[09/15] mahout git commit: NO-JIRA Trevors updates

rawkintrevo Sat, 08 Sep 2018 16:36:02 -0700

http://git-wip-us.apache.org/repos/asf/mahout/blob/545648f6/core/src/main/java/org/apache/mahout/math/SparseRowMatrix.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/mahout/math/SparseRowMatrix.java 
b/core/src/main/java/org/apache/mahout/math/SparseRowMatrix.java
new file mode 100644
index 0000000..ee54ad0
--- /dev/null
+++ b/core/src/main/java/org/apache/mahout/math/SparseRowMatrix.java
@@ -0,0 +1,289 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.math;
+
+import org.apache.mahout.math.flavor.MatrixFlavor;
+import org.apache.mahout.math.flavor.TraversingStructureEnum;
+import org.apache.mahout.math.function.DoubleDoubleFunction;
+import org.apache.mahout.math.function.Functions;
+import org.slf4j.Logger;
+import org.slf4j.LoggerFactory;
+
+import java.util.Iterator;
+
+/**
+ * sparse matrix with general element values whose rows are accessible 
quickly. Implemented as a row
+ * array of either SequentialAccessSparseVectors or RandomAccessSparseVectors.
+ */
+public class SparseRowMatrix extends AbstractMatrix {
+  private Vector[] rowVectors;
+
+  private final boolean randomAccessRows;
+
+  private static final Logger log = 
LoggerFactory.getLogger(SparseRowMatrix.class);
+
+  /**
+   * Construct a sparse matrix starting with the provided row vectors.
+   *
+   * @param rows       The number of rows in the result
+   * @param columns    The number of columns in the result
+   * @param rowVectors a Vector[] array of rows
+   */
+  public SparseRowMatrix(int rows, int columns, Vector[] rowVectors) {
+    this(rows, columns, rowVectors, false, rowVectors instanceof 
RandomAccessSparseVector[]);
+  }
+
+  public SparseRowMatrix(int rows, int columns, boolean randomAccess) {
+    this(rows, columns, randomAccess
+        ? new RandomAccessSparseVector[rows]
+        : new SequentialAccessSparseVector[rows],
+      true,
+      randomAccess);
+  }
+
+  public SparseRowMatrix(int rows, int columns, Vector[] vectors, boolean 
shallowCopy, boolean randomAccess) {
+    super(rows, columns);
+    this.randomAccessRows = randomAccess;
+    this.rowVectors = vectors.clone();
+    for (int row = 0; row < rows; row++) {
+      if (vectors[row] == null) {
+        // TODO: this can't be right to change the argument
+        vectors[row] = randomAccess
+          ? new RandomAccessSparseVector(numCols(), 10)
+          : new SequentialAccessSparseVector(numCols(), 10);
+      }
+      this.rowVectors[row] = shallowCopy ? vectors[row] : vectors[row].clone();
+    }
+  }
+
+  /**
+   * Construct a matrix of the given cardinality, with rows defaulting to 
RandomAccessSparseVector
+   * implementation
+   *
+   * @param rows      Number of rows in result
+   * @param columns   Number of columns in result
+   */
+  public SparseRowMatrix(int rows, int columns) {
+    this(rows, columns, true);
+  }
+
+  @Override
+  public Matrix clone() {
+    SparseRowMatrix clone = (SparseRowMatrix) super.clone();
+    clone.rowVectors = new Vector[rowVectors.length];
+    for (int i = 0; i < rowVectors.length; i++) {
+      clone.rowVectors[i] = rowVectors[i].clone();
+    }
+    return clone;
+  }
+
+  @Override
+  public double getQuick(int row, int column) {
+    return rowVectors[row] == null ? 0.0 : rowVectors[row].getQuick(column);
+  }
+
+  @Override
+  public Matrix like() {
+    return new SparseRowMatrix(rowSize(), columnSize(), randomAccessRows);
+  }
+
+  @Override
+  public Matrix like(int rows, int columns) {
+    return new SparseRowMatrix(rows, columns, randomAccessRows);
+  }
+
+  @Override
+  public void setQuick(int row, int column, double value) {
+    rowVectors[row].setQuick(column, value);
+  }
+
+  @Override
+  public int[] getNumNondefaultElements() {
+    int[] result = new int[2];
+    result[ROW] = rowVectors.length;
+    for (int row = 0; row < rowSize(); row++) {
+      result[COL] = Math.max(result[COL], 
rowVectors[row].getNumNondefaultElements());
+    }
+    return result;
+  }
+
+  @Override
+  public Matrix viewPart(int[] offset, int[] size) {
+    if (offset[ROW] < 0) {
+      throw new IndexException(offset[ROW], rowVectors.length);
+    }
+    if (offset[ROW] + size[ROW] > rowVectors.length) {
+      throw new IndexException(offset[ROW] + size[ROW], rowVectors.length);
+    }
+    if (offset[COL] < 0) {
+      throw new IndexException(offset[COL], rowVectors[ROW].size());
+    }
+    if (offset[COL] + size[COL] > rowVectors[ROW].size()) {
+      throw new IndexException(offset[COL] + size[COL], 
rowVectors[ROW].size());
+    }
+    return new MatrixView(this, offset, size);
+  }
+
+  @Override
+  public Matrix assign(Matrix other, DoubleDoubleFunction function) {
+    int rows = rowSize();
+    if (rows != other.rowSize()) {
+      throw new CardinalityException(rows, other.rowSize());
+    }
+    int columns = columnSize();
+    if (columns != other.columnSize()) {
+      throw new CardinalityException(columns, other.columnSize());
+    }
+    for (int row = 0; row < rows; row++) {
+      try {
+        Iterator<Vector.Element> sparseRowIterator = 
((SequentialAccessSparseVector) this.rowVectors[row])
+                .iterateNonZero();
+        if (function.isLikeMult()) { // TODO: is this a sufficient test?
+          // TODO: this may cause an exception if the row type is not 
compatible but it is currently guaranteed to be
+          // a SequentialAccessSparseVector, should "try" here just in case 
and Warn
+          // TODO: can we use iterateNonZero on both rows until the index is 
the same to get better speedup?
+
+          // TODO: SASVs have an iterateNonZero that returns zeros, this 
should not hurt but is far from optimal
+          // this might perform much better if SparseRowMatrix were backed by 
RandomAccessSparseVectors, which
+          // are backed by fastutil hashmaps and the iterateNonZero actually 
does only return nonZeros.
+          while (sparseRowIterator.hasNext()) {
+            Vector.Element element = sparseRowIterator.next();
+            int col = element.index();
+            setQuick(row, col, function.apply(element.get(), 
other.getQuick(row, col)));
+          }
+        } else {
+          for (int col = 0; col < columns; col++) {
+            setQuick(row, col, function.apply(getQuick(row, col), 
other.getQuick(row, col)));
+          }
+        }
+
+      } catch (ClassCastException e) {
+        // Warn and use default implementation
+        log.warn("Error casting the row to SequentialAccessSparseVector, this 
should never happen because" +
+                "SparseRomMatrix is always made of 
SequentialAccessSparseVectors. Proceeding with non-optimzed" +
+                "implementation.");
+        for (int col = 0; col < columns; col++) {
+          setQuick(row, col, function.apply(getQuick(row, col), 
other.getQuick(row, col)));
+        }
+      }
+    }
+    return this;
+  }
+
+  @Override
+  public Matrix assignColumn(int column, Vector other) {
+    if (rowSize() != other.size()) {
+      throw new CardinalityException(rowSize(), other.size());
+    }
+    if (column < 0 || column >= columnSize()) {
+      throw new IndexException(column, columnSize());
+    }
+    for (int row = 0; row < rowSize(); row++) {
+      rowVectors[row].setQuick(column, other.getQuick(row));
+    }
+    return this;
+  }
+
+  @Override
+  public Matrix assignRow(int row, Vector other) {
+    if (columnSize() != other.size()) {
+      throw new CardinalityException(columnSize(), other.size());
+    }
+    if (row < 0 || row >= rowSize()) {
+      throw new IndexException(row, rowSize());
+    }
+    rowVectors[row].assign(other);
+    return this;
+  }
+
+  /**
+   * @param row an int row index
+   * @return a shallow view of the Vector at specified row (ie you may mutate 
the original matrix
+   * using this row)
+   */
+  @Override
+  public Vector viewRow(int row) {
+    if (row < 0 || row >= rowSize()) {
+      throw new IndexException(row, rowSize());
+    }
+    return rowVectors[row];
+  }
+
+  @Override
+  public Matrix transpose() {
+    SparseColumnMatrix scm = new SparseColumnMatrix(columns, rows);
+    for (int i = 0; i < rows; i++) {
+      Vector row = rowVectors[i];
+      if (row.getNumNonZeroElements() > 0) {
+        scm.assignColumn(i, row);
+      }
+    }
+    return scm;
+  }
+
+  @Override
+  public Matrix times(Matrix other) {
+    if (columnSize() != other.rowSize()) {
+      throw new CardinalityException(columnSize(), other.rowSize());
+    }
+
+    if (other instanceof SparseRowMatrix) {
+      SparseRowMatrix y = (SparseRowMatrix) other;
+      SparseRowMatrix result = (SparseRowMatrix) like(rowSize(), 
other.columnSize());
+
+      for (int i = 0; i < rows; i++) {
+        Vector row = rowVectors[i];
+        for (Vector.Element element : row.nonZeroes()) {
+          result.rowVectors[i].assign(y.rowVectors[element.index()], 
Functions.plusMult(element.get()));
+        }
+      }
+      return result;
+    } else {
+      if (other.viewRow(0).isDense()) {
+        // result is dense, but can be computed relatively cheaply
+        Matrix result = other.like(rowSize(), other.columnSize());
+
+        for (int i = 0; i < rows; i++) {
+          Vector row = rowVectors[i];
+          Vector r = new DenseVector(other.columnSize());
+          for (Vector.Element element : row.nonZeroes()) {
+            r.assign(other.viewRow(element.index()), 
Functions.plusMult(element.get()));
+          }
+          result.viewRow(i).assign(r);
+        }
+        return result;
+      } else {
+        // other is sparse, but not something we understand intimately
+        SparseRowMatrix result = (SparseRowMatrix) like(rowSize(), 
other.columnSize());
+
+        for (int i = 0; i < rows; i++) {
+          Vector row = rowVectors[i];
+          for (Vector.Element element : row.nonZeroes()) {
+            result.rowVectors[i].assign(other.viewRow(element.index()), 
Functions.plusMult(element.get()));
+          }
+        }
+        return result;
+      }
+    }
+  }
+
+  @Override
+  public MatrixFlavor getFlavor() {
+    return MatrixFlavor.SPARSELIKE;
+  }
+}


http://git-wip-us.apache.org/repos/asf/mahout/blob/545648f6/core/src/main/java/org/apache/mahout/math/Swapper.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/mahout/math/Swapper.java 
b/core/src/main/java/org/apache/mahout/math/Swapper.java
new file mode 100644
index 0000000..1ca3744
--- /dev/null
+++ b/core/src/main/java/org/apache/mahout/math/Swapper.java
@@ -0,0 +1,35 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+/*
+Copyright 1999 CERN - European Organization for Nuclear Research.
+Permission to use, copy, modify, distribute and sell this software and its 
documentation for any purpose 
+is hereby granted without fee, provided that the above copyright notice appear 
in all copies and 
+that both that copyright notice and this permission notice appear in 
supporting documentation. 
+CERN makes no representations about the suitability of this software for any 
purpose. 
+It is provided "as is" without expressed or implied warranty.
+*/
+package org.apache.mahout.math;
+
+/**
+ * Interface for an object that knows how to swap elements at two positions 
(a,b).
+ */
+public interface Swapper {
+
+  /** Swaps the generic data g[a] with g[b]. */
+  void swap(int a, int b);
+}

http://git-wip-us.apache.org/repos/asf/mahout/blob/545648f6/core/src/main/java/org/apache/mahout/math/TransposedMatrixView.java
----------------------------------------------------------------------
diff --git 
a/core/src/main/java/org/apache/mahout/math/TransposedMatrixView.java 
b/core/src/main/java/org/apache/mahout/math/TransposedMatrixView.java
new file mode 100644
index 0000000..ede6f35
--- /dev/null
+++ b/core/src/main/java/org/apache/mahout/math/TransposedMatrixView.java
@@ -0,0 +1,147 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.math;
+
+import org.apache.mahout.math.flavor.BackEnum;
+import org.apache.mahout.math.flavor.MatrixFlavor;
+import org.apache.mahout.math.flavor.TraversingStructureEnum;
+import org.apache.mahout.math.function.DoubleDoubleFunction;
+import org.apache.mahout.math.function.DoubleFunction;
+
+/**
+ * Matrix View backed by an {@link 
org.apache.mahout.math.function.IntIntFunction}
+ */
+public class TransposedMatrixView extends AbstractMatrix {
+
+  private Matrix m;
+
+  public TransposedMatrixView(Matrix m) {
+    super(m.numCols(), m.numRows());
+    this.m = m;
+  }
+
+  @Override
+  public Matrix assignColumn(int column, Vector other) {
+    m.assignRow(column,other);
+    return this;
+  }
+
+  @Override
+  public Matrix assignRow(int row, Vector other) {
+    m.assignColumn(row,other);
+    return this;
+  }
+
+  @Override
+  public double getQuick(int row, int column) {
+    return m.getQuick(column,row);
+  }
+
+  @Override
+  public Matrix like() {
+    return m.like(rows, columns);
+  }
+
+  @Override
+  public Matrix like(int rows, int columns) {
+    return m.like(rows,columns);
+  }
+
+  @Override
+  public void setQuick(int row, int column, double value) {
+    m.setQuick(column, row, value);
+  }
+
+  @Override
+  public Vector viewRow(int row) {
+    return m.viewColumn(row);
+  }
+
+  @Override
+  public Vector viewColumn(int column) {
+    return m.viewRow(column);
+  }
+
+  @Override
+  public Matrix assign(double value) {
+    return m.assign(value);
+  }
+
+  @Override
+  public Matrix assign(Matrix other, DoubleDoubleFunction function) {
+    if (other instanceof TransposedMatrixView) {
+      m.assign(((TransposedMatrixView) other).m, function);
+    } else {
+      m.assign(new TransposedMatrixView(other), function);
+    }
+    return this;
+  }
+
+  @Override
+  public Matrix assign(Matrix other) {
+    if (other instanceof TransposedMatrixView) {
+      return m.assign(((TransposedMatrixView) other).m);
+    } else {
+      return m.assign(new TransposedMatrixView(other));
+    }
+  }
+
+  @Override
+  public Matrix assign(DoubleFunction function) {
+    return m.assign(function);
+  }
+
+  @Override
+  public MatrixFlavor getFlavor() {
+    return flavor;
+  }
+
+  private MatrixFlavor flavor = new MatrixFlavor() {
+    @Override
+    public BackEnum getBacking() {
+      return m.getFlavor().getBacking();
+    }
+
+    @Override
+    public TraversingStructureEnum getStructure() {
+      TraversingStructureEnum flavor = m.getFlavor().getStructure();
+      switch (flavor) {
+        case COLWISE:
+          return TraversingStructureEnum.ROWWISE;
+        case SPARSECOLWISE:
+          return TraversingStructureEnum.SPARSEROWWISE;
+        case ROWWISE:
+          return TraversingStructureEnum.COLWISE;
+        case SPARSEROWWISE:
+          return TraversingStructureEnum.SPARSECOLWISE;
+        default:
+          return flavor;
+      }
+    }
+
+    @Override
+    public boolean isDense() {
+      return m.getFlavor().isDense();
+    }
+  };
+
+  Matrix getDelegate() {
+    return m;
+  }
+
+}

http://git-wip-us.apache.org/repos/asf/mahout/blob/545648f6/core/src/main/java/org/apache/mahout/math/UpperTriangular.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/mahout/math/UpperTriangular.java 
b/core/src/main/java/org/apache/mahout/math/UpperTriangular.java
new file mode 100644
index 0000000..29fa6a0
--- /dev/null
+++ b/core/src/main/java/org/apache/mahout/math/UpperTriangular.java
@@ -0,0 +1,160 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.math;
+
+import org.apache.mahout.math.flavor.BackEnum;
+import org.apache.mahout.math.flavor.MatrixFlavor;
+import org.apache.mahout.math.flavor.TraversingStructureEnum;
+
+/**
+ * 
+ * Quick and dirty implementation of some {@link 
org.apache.mahout.math.Matrix} methods
+ * over packed upper triangular matrix.
+ *
+ */
+public class UpperTriangular extends AbstractMatrix {
+
+  private static final double EPSILON = 1.0e-12; // assume anything less than
+                                                 // that to be 0 during
+                                                 // non-upper assignments
+
+  private double[] values;
+
+  /**
+   * represents n x n upper triangular matrix
+   * 
+   * @param n
+   */
+
+  public UpperTriangular(int n) {
+    super(n, n);
+    values = new double[n * (n + 1) / 2];
+  }
+
+  public UpperTriangular(double[] data, boolean shallow) {
+    this(elementsToMatrixSize(data != null ? data.length : 0));
+    if (data == null) {
+      throw new IllegalArgumentException("data");
+    }
+    values = shallow ? data : data.clone();
+  }
+
+  public UpperTriangular(Vector data) {
+    this(elementsToMatrixSize(data.size()));
+
+    for (Vector.Element el:data.nonZeroes()) {
+      values[el.index()] = el.get();
+    }
+  }
+
+  private static int elementsToMatrixSize(int dataSize) {
+    return (int) Math.round((-1 + Math.sqrt(1 + 8 * dataSize)) / 2);
+  }
+
+  // copy-constructor
+  public UpperTriangular(UpperTriangular mx) {
+    this(mx.values, false);
+  }
+
+  @Override
+  public Matrix assignColumn(int column, Vector other) {
+    if (columnSize() != other.size()) {
+      throw new IndexException(columnSize(), other.size());
+    }
+    if (other.viewPart(column + 1, other.size() - column - 1).norm(1) > 
1.0e-14) {
+      throw new IllegalArgumentException("Cannot set lower portion of 
triangular matrix to non-zero");
+    }
+    for (Vector.Element element : other.viewPart(0, column).all()) {
+      setQuick(element.index(), column, element.get());
+    }
+    return this;
+  }
+
+  @Override
+  public Matrix assignRow(int row, Vector other) {
+    if (columnSize() != other.size()) {
+      throw new IndexException(numCols(), other.size());
+    }
+    for (int i = 0; i < row; i++) {
+      if (Math.abs(other.getQuick(i)) > EPSILON) {
+        throw new IllegalArgumentException("non-triangular source");
+      }
+    }
+    for (int i = row; i < rows; i++) {
+      setQuick(row, i, other.get(i));
+    }
+    return this;
+  }
+
+  public Matrix assignNonZeroElementsInRow(int row, double[] other) {
+    System.arraycopy(other, row, values, getL(row, row), rows - row);
+    return this;
+  }
+
+  @Override
+  public double getQuick(int row, int column) {
+    if (row > column) {
+      return 0;
+    }
+    int i = getL(row, column);
+    return values[i];
+  }
+
+  private int getL(int row, int col) {
+    /*
+     * each row starts with some zero elements that we don't store. this
+     * accumulates an offset of (row+1)*row/2
+     */
+    return col + row * numCols() - (row + 1) * row / 2;
+  }
+
+  @Override
+  public Matrix like() {
+    return like(rowSize(), columnSize());
+  }
+
+  @Override
+  public Matrix like(int rows, int columns) {
+    return new DenseMatrix(rows, columns);
+  }
+
+  @Override
+  public void setQuick(int row, int column, double value) {
+    values[getL(row, column)] = value;
+  }
+
+  @Override
+  public int[] getNumNondefaultElements() {
+    throw new UnsupportedOperationException();
+  }
+
+  @Override
+  public Matrix viewPart(int[] offset, int[] size) {
+    return new MatrixView(this, offset, size);
+  }
+
+  public double[] getData() {
+    return values;
+  }
+
+  @Override
+  public MatrixFlavor getFlavor() {
+    // We kind of consider ourselves a vector-backed but dense matrix for 
mmul, etc. purposes.
+    return new MatrixFlavor.FlavorImpl(BackEnum.JVMMEM, 
TraversingStructureEnum.VECTORBACKED, true);
+  }
+}

http://git-wip-us.apache.org/repos/asf/mahout/blob/545648f6/core/src/main/java/org/apache/mahout/math/Vector.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/mahout/math/Vector.java 
b/core/src/main/java/org/apache/mahout/math/Vector.java
new file mode 100644
index 0000000..c3b1dc9
--- /dev/null
+++ b/core/src/main/java/org/apache/mahout/math/Vector.java
@@ -0,0 +1,434 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.math;
+
+
+import org.apache.mahout.math.function.DoubleDoubleFunction;
+import org.apache.mahout.math.function.DoubleFunction;
+
+/**
+ * The basic interface including numerous convenience functions <p> NOTE: All 
implementing classes must have a
+ * constructor that takes an int for cardinality and a no-arg constructor that 
can be used for marshalling the Writable
+ * instance <p> NOTE: Implementations may choose to reuse the Vector.Element 
in the Iterable methods
+ */
+public interface Vector extends Cloneable {
+
+  /** @return a formatted String suitable for output */
+  String asFormatString();
+
+  /**
+   * Assign the value to all elements of the receiver
+   *
+   * @param value a double value
+   * @return the modified receiver
+   */
+  Vector assign(double value);
+
+  /**
+   * Assign the values to the receiver
+   *
+   * @param values a double[] of values
+   * @return the modified receiver
+   * @throws CardinalityException if the cardinalities differ
+   */
+  Vector assign(double[] values);
+
+  /**
+   * Assign the other vector values to the receiver
+   *
+   * @param other a Vector
+   * @return the modified receiver
+   * @throws CardinalityException if the cardinalities differ
+   */
+  Vector assign(Vector other);
+
+  /**
+   * Apply the function to each element of the receiver
+   *
+   * @param function a DoubleFunction to apply
+   * @return the modified receiver
+   */
+  Vector assign(DoubleFunction function);
+
+  /**
+   * Apply the function to each element of the receiver and the corresponding 
element of the other argument
+   *
+   * @param other    a Vector containing the second arguments to the function
+   * @param function a DoubleDoubleFunction to apply
+   * @return the modified receiver
+   * @throws CardinalityException if the cardinalities differ
+   */
+  Vector assign(Vector other, DoubleDoubleFunction function);
+
+  /**
+   * Apply the function to each element of the receiver, using the y value as 
the second argument of the
+   * DoubleDoubleFunction
+   *
+   * @param f a DoubleDoubleFunction to be applied
+   * @param y a double value to be argument to the function
+   * @return the modified receiver
+   */
+  Vector assign(DoubleDoubleFunction f, double y);
+
+  /**
+   * Return the cardinality of the recipient (the maximum number of values)
+   *
+   * @return an int
+   */
+  int size();
+
+  /**
+   * true if this implementation should be considered dense -- that it 
explicitly
+   *  represents every value
+   *
+   * @return true or false
+   */
+  boolean isDense();
+
+  /**
+   * true if this implementation should be considered to be iterable in index 
order in an efficient way.
+   * In particular this implies that {@link #all()} and {@link #nonZeroes()} 
()} return elements
+   * in ascending order by index.
+   *
+   * @return true iff this implementation should be considered to be iterable 
in index order in an efficient way.
+   */
+  boolean isSequentialAccess();
+
+  /**
+   * Return a copy of the recipient
+   *
+   * @return a new Vector
+   */
+  @SuppressWarnings("CloneDoesntDeclareCloneNotSupportedException")
+  Vector clone();
+
+  Iterable<Element> all();
+
+  Iterable<Element> nonZeroes();
+
+  /**
+   * Return an object of Vector.Element representing an element of this 
Vector. Useful when designing new iterator
+   * types.
+   *
+   * @param index Index of the Vector.Element required
+   * @return The Vector.Element Object
+   */
+  Element getElement(int index);
+
+  /**
+   * Merge a set of (index, value) pairs into the vector.
+   * @param updates an ordered mapping of indices to values to be merged in.
+   */
+  void mergeUpdates(OrderedIntDoubleMapping updates);
+
+  /**
+   * A holder for information about a specific item in the Vector. <p>
+   * When using with an Iterator, the implementation
+   * may choose to reuse this element, so you may need to make a copy if you 
want to keep it
+   */
+  interface Element {
+
+    /** @return the value of this vector element. */
+    double get();
+
+    /** @return the index of this vector element. */
+    int index();
+
+    /** @param value Set the current element to value. */
+    void set(double value);
+  }
+
+  /**
+   * Return a new vector containing the values of the recipient divided by the 
argument
+   *
+   * @param x a double value
+   * @return a new Vector
+   */
+  Vector divide(double x);
+
+  /**
+   * Return the dot product of the recipient and the argument
+   *
+   * @param x a Vector
+   * @return a new Vector
+   * @throws CardinalityException if the cardinalities differ
+   */
+  double dot(Vector x);
+
+  /**
+   * Return the value at the given index
+   *
+   * @param index an int index
+   * @return the double at the index
+   * @throws IndexException if the index is out of bounds
+   */
+  double get(int index);
+
+  /**
+   * Return the value at the given index, without checking bounds
+   *
+   * @param index an int index
+   * @return the double at the index
+   */
+  double getQuick(int index);
+
+  /**
+   * Return an empty vector of the same underlying class as the receiver
+   *
+   * @return a Vector
+   */
+  Vector like();
+
+  /**
+   * Return a new empty vector of the same underlying class as the receiver 
with given cardinality
+   *
+   * @param cardinality - size of vector
+   * @return {@link Vector}
+   */
+  Vector like(int cardinality);
+
+  /**
+   * Return a new vector containing the element by element difference of the 
recipient and the argument
+   *
+   * @param x a Vector
+   * @return a new Vector
+   * @throws CardinalityException if the cardinalities differ
+   */
+  Vector minus(Vector x);
+
+  /**
+   * Return a new vector containing the normalized (L_2 norm) values of the 
recipient
+   *
+   * @return a new Vector
+   */
+  Vector normalize();
+
+  /**
+   * Return a new Vector containing the normalized (L_power norm) values of 
the recipient. <p>
+   * See
+   * http://en.wikipedia.org/wiki/Lp_space <p>
+   * Technically, when {@code 0 < power < 1}, we don't have a norm, just a 
metric,
+   * but we'll overload this here. <p>
+   * Also supports {@code power == 0} (number of non-zero elements) and power 
= {@link
+   * Double#POSITIVE_INFINITY} (max element). Again, see the Wikipedia page 
for more info
+   *
+   * @param power The power to use. Must be >= 0. May also be {@link 
Double#POSITIVE_INFINITY}. See the Wikipedia link
+   *              for more on this.
+   * @return a new Vector x such that norm(x, power) == 1
+   */
+  Vector normalize(double power);
+
+  /**
+   * Return a new vector containing the log(1 + entry)/ L_2 norm  values of 
the recipient
+   *
+   * @return a new Vector
+   */
+  Vector logNormalize();
+
+  /**
+   * Return a new Vector with a normalized value calculated as log_power(1 + 
entry)/ L_power norm. <p>
+   *
+   * @param power The power to use. Must be > 1. Cannot be {@link 
Double#POSITIVE_INFINITY}.
+   * @return a new Vector
+   */
+  Vector logNormalize(double power);
+
+  /**
+   * Return the k-norm of the vector. <p/> See 
http://en.wikipedia.org/wiki/Lp_space <p>
+   * Technically, when {@code 0 > power < 1}, we don't have a norm, just a 
metric, but we'll overload this here. Also supports power == 0 (number of
+   * non-zero elements) and power = {@link Double#POSITIVE_INFINITY} (max 
element). Again, see the Wikipedia page for
+   * more info.
+   *
+   * @param power The power to use.
+   * @see #normalize(double)
+   */
+  double norm(double power);
+
+  /** @return The minimum value in the Vector */
+  double minValue();
+
+  /** @return The index of the minimum value */
+  int minValueIndex();
+
+  /** @return The maximum value in the Vector */
+  double maxValue();
+
+  /** @return The index of the maximum value */
+  int maxValueIndex();
+
+  /**
+   * Return a new vector containing the sum of each value of the recipient and 
the argument
+   *
+   * @param x a double
+   * @return a new Vector
+   */
+  Vector plus(double x);
+
+  /**
+   * Return a new vector containing the element by element sum of the 
recipient and the argument
+   *
+   * @param x a Vector
+   * @return a new Vector
+   * @throws CardinalityException if the cardinalities differ
+   */
+  Vector plus(Vector x);
+
+  /**
+   * Set the value at the given index
+   *
+   * @param index an int index into the receiver
+   * @param value a double value to set
+   * @throws IndexException if the index is out of bounds
+   */
+  void set(int index, double value);
+
+  /**
+   * Set the value at the given index, without checking bounds
+   *
+   * @param index an int index into the receiver
+   * @param value a double value to set
+   */
+  void setQuick(int index, double value);
+
+  /**
+   * Increment the value at the given index by the given value.
+   *
+   * @param index an int index into the receiver
+   * @param increment sets the value at the given index to value + increment;
+   */
+  void incrementQuick(int index, double increment);
+
+  /**
+   * Return the number of values in the recipient which are not the default 
value.  For instance, for a
+   * sparse vector, this would be the number of non-zero values.
+   *
+   * @return an int
+   */
+  int getNumNondefaultElements();
+
+  /**
+   * Return the number of non zero elements in the vector.
+   *
+   * @return an int
+   */
+  int getNumNonZeroElements();
+
+  /**
+   * Return a new vector containing the product of each value of the recipient 
and the argument
+   *
+   * @param x a double argument
+   * @return a new Vector
+   */
+  Vector times(double x);
+
+  /**
+   * Return a new vector containing the element-wise product of the recipient 
and the argument
+   *
+   * @param x a Vector argument
+   * @return a new Vector
+   * @throws CardinalityException if the cardinalities differ
+   */
+  Vector times(Vector x);
+
+  /**
+   * Return a new vector containing the subset of the recipient
+   *
+   * @param offset an int offset into the receiver
+   * @param length the cardinality of the desired result
+   * @return a new Vector
+   * @throws CardinalityException if the length is greater than the 
cardinality of the receiver
+   * @throws IndexException       if the offset is negative or the 
offset+length is outside of the receiver
+   */
+  Vector viewPart(int offset, int length);
+
+  /**
+   * Return the sum of all the elements of the receiver
+   *
+   * @return a double
+   */
+  double zSum();
+
+  /**
+   * Return the cross product of the receiver and the other vector
+   *
+   * @param other another Vector
+   * @return a Matrix
+   */
+  Matrix cross(Vector other);
+
+  /*
+   * Need stories for these but keeping them here for now.
+   */
+  // void getNonZeros(IntArrayList jx, DoubleArrayList values);
+  // void foreachNonZero(IntDoubleFunction f);
+  // DoubleDoubleFunction map);
+  // NewVector assign(Vector y, DoubleDoubleFunction function, IntArrayList
+  // nonZeroIndexes);
+
+  /**
+   * Examples speak louder than words:  aggregate(plus, pow(2)) is another way 
to say
+   * getLengthSquared(), aggregate(max, abs) is 
norm(Double.POSITIVE_INFINITY).  To sum all of the positive values,
+   * aggregate(plus, max(0)).
+   * @param aggregator used to combine the current value of the aggregation 
with the result of map.apply(nextValue)
+   * @param map a function to apply to each element of the vector in turn 
before passing to the aggregator
+   * @return the final aggregation
+   */
+  double aggregate(DoubleDoubleFunction aggregator, DoubleFunction map);
+
+  /**
+   * <p>Generalized inner product - take two vectors, iterate over them both, 
using the combiner to combine together
+   * (and possibly map in some way) each pair of values, which are then 
aggregated with the previous accumulated
+   * value in the combiner.</p>
+   * <p>
+   * Example: dot(other) could be expressed as aggregate(other, Plus, Times), 
and kernelized inner products (which
+   * are symmetric on the indices) work similarly.
+   * @param other a vector to aggregate in combination with
+   * @param aggregator function we're aggregating with; fa
+   * @param combiner function we're combining with; fc
+   * @return the final aggregation; {@code if r0 = fc(this[0], other[0]), ri = 
fa(r_{i-1}, fc(this[i], other[i]))
+   * for all i > 0}
+   */
+  double aggregate(Vector other, DoubleDoubleFunction aggregator, 
DoubleDoubleFunction combiner);
+
+  /**
+   * Return the sum of squares of all elements in the vector. Square root of
+   * this value is the length of the vector.
+   */
+  double getLengthSquared();
+
+  /**
+   * Get the square of the distance between this vector and the other vector.
+   */
+  double getDistanceSquared(Vector v);
+
+  /**
+   * Gets an estimate of the cost (in number of operations) it takes to lookup 
a random element in this vector.
+   */
+  double getLookupCost();
+
+  /**
+   * Gets an estimate of the cost (in number of operations) it takes to 
advance an iterator through the nonzero
+   * elements of this vector.
+   */
+  double getIteratorAdvanceCost();
+
+  /**
+   * Return true iff adding a new (nonzero) element takes constant time for 
this vector.
+   */
+  boolean isAddConstantTime();
+}

http://git-wip-us.apache.org/repos/asf/mahout/blob/545648f6/core/src/main/java/org/apache/mahout/math/VectorBinaryAggregate.java
----------------------------------------------------------------------
diff --git 
a/core/src/main/java/org/apache/mahout/math/VectorBinaryAggregate.java 
b/core/src/main/java/org/apache/mahout/math/VectorBinaryAggregate.java
new file mode 100644
index 0000000..4d3a80f
--- /dev/null
+++ b/core/src/main/java/org/apache/mahout/math/VectorBinaryAggregate.java
@@ -0,0 +1,481 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.math;
+
+import org.apache.mahout.math.function.DoubleDoubleFunction;
+import org.apache.mahout.math.set.OpenIntHashSet;
+
+import java.util.Iterator;
+
+/**
+ * Abstract class encapsulating different algorithms that perform the Vector 
operations aggregate().
+ * x.aggregte(y, fa, fc), for x and y Vectors and fa, fc DoubleDouble 
functions:
+ * - applies the function fc to every element in x and y, fc(xi, yi)
+ * - constructs a result iteratively, r0 = fc(x0, y0), ri = fc(r_{i-1}, fc(xi, 
yi)).
+ * This works essentially like a map/reduce functional combo.
+ *
+ * The names of variables, methods and classes used here follow the following 
conventions:
+ * The vector being assigned to (the left hand side) is called this or x.
+ * The right hand side is called that or y.
+ * The aggregating (reducing) function to be applied is called fa.
+ * The combining (mapping) function to be applied is called fc.
+ *
+ * The different algorithms take into account the different characteristics of 
vector classes:
+ * - whether the vectors support sequential iteration (isSequential())
+ * - what the lookup cost is (getLookupCost())
+ * - what the iterator advancement cost is (getIteratorAdvanceCost())
+ *
+ * The names of the actual classes (they're nested in VectorBinaryAssign) 
describe the used for assignment.
+ * The most important optimization is iterating just through the nonzeros 
(only possible if f(0, 0) = 0).
+ * There are 4 main possibilities:
+ * - iterating through the nonzeros of just one vector and looking up the 
corresponding elements in the other
+ * - iterating through the intersection of nonzeros (those indices where both 
vectors have nonzero values)
+ * - iterating through the union of nonzeros (those indices where at least one 
of the vectors has a nonzero value)
+ * - iterating through all the elements in some way (either through both at 
the same time, both one after the other,
+ *   looking up both, looking up just one).
+ *
+ * The internal details are not important and a particular algorithm should 
generally not be called explicitly.
+ * The best one will be selected through assignBest(), which is itself called 
through Vector.assign().
+ *
+ * See 
https://docs.google.com/document/d/1g1PjUuvjyh2LBdq2_rKLIcUiDbeOORA1sCJiSsz-JVU/edit#
 for a more detailed
+ * explanation.
+ */
+public abstract class VectorBinaryAggregate {
+  public static final VectorBinaryAggregate[] OPERATIONS = {
+    new AggregateNonzerosIterateThisLookupThat(),
+    new AggregateNonzerosIterateThatLookupThis(),
+
+    new AggregateIterateIntersection(),
+
+    new AggregateIterateUnionSequential(),
+    new AggregateIterateUnionRandom(),
+
+    new AggregateAllIterateSequential(),
+    new AggregateAllIterateThisLookupThat(),
+    new AggregateAllIterateThatLookupThis(),
+    new AggregateAllLoop(),
+  };
+
+  /**
+   * Returns true iff we can use this algorithm to apply fc to x and y 
component-wise and aggregate the result using fa.
+   */
+  public abstract boolean isValid(Vector x, Vector y, DoubleDoubleFunction fa, 
DoubleDoubleFunction fc);
+
+  /**
+   * Estimates the cost of using this algorithm to compute the aggregation. 
The algorithm is assumed to be valid.
+   */
+  public abstract double estimateCost(Vector x, Vector y, DoubleDoubleFunction 
fa, DoubleDoubleFunction fc);
+
+  /**
+   * Main method that applies fc to x and y component-wise aggregating the 
results with fa. It returns the result of
+   * the aggregation.
+   */
+  public abstract double aggregate(Vector x, Vector y, DoubleDoubleFunction 
fa, DoubleDoubleFunction fc);
+
+  /**
+   * The best operation is the least expensive valid one.
+   */
+  public static VectorBinaryAggregate getBestOperation(Vector x, Vector y, 
DoubleDoubleFunction fa,
+                                                       DoubleDoubleFunction 
fc) {
+    int bestOperationIndex = -1;
+    double bestCost = Double.POSITIVE_INFINITY;
+    for (int i = 0; i < OPERATIONS.length; ++i) {
+      if (OPERATIONS[i].isValid(x, y, fa, fc)) {
+        double cost = OPERATIONS[i].estimateCost(x, y, fa, fc);
+        if (cost < bestCost) {
+          bestCost = cost;
+          bestOperationIndex = i;
+        }
+      }
+    }
+    return OPERATIONS[bestOperationIndex];
+  }
+
+  /**
+   * This is the method that should be used when aggregating. It selects the 
best algorithm and applies it.
+   */
+  public static double aggregateBest(Vector x, Vector y, DoubleDoubleFunction 
fa, DoubleDoubleFunction fc) {
+    return getBestOperation(x, y, fa, fc).aggregate(x, y, fa, fc);
+  }
+
+  public static class AggregateNonzerosIterateThisLookupThat extends 
VectorBinaryAggregate {
+
+    @Override
+    public boolean isValid(Vector x, Vector y, DoubleDoubleFunction fa, 
DoubleDoubleFunction fc) {
+      return fa.isLikeRightPlus() && (fa.isAssociativeAndCommutative() || 
x.isSequentialAccess())
+          && fc.isLikeLeftMult();
+    }
+
+    @Override
+    public double estimateCost(Vector x, Vector y, DoubleDoubleFunction fa, 
DoubleDoubleFunction fc) {
+      return x.getNumNondefaultElements() * x.getIteratorAdvanceCost() * 
y.getLookupCost();
+    }
+
+    @Override
+    public double aggregate(Vector x, Vector y, DoubleDoubleFunction fa, 
DoubleDoubleFunction fc) {
+      Iterator<Vector.Element> xi = x.nonZeroes().iterator();
+      if (!xi.hasNext()) {
+        return 0;
+      }
+      Vector.Element xe = xi.next();
+      double result = fc.apply(xe.get(), y.getQuick(xe.index()));
+      while (xi.hasNext()) {
+        xe = xi.next();
+        result = fa.apply(result, fc.apply(xe.get(), y.getQuick(xe.index())));
+      }
+      return result;
+    }
+  }
+
+  public static class AggregateNonzerosIterateThatLookupThis extends 
VectorBinaryAggregate {
+
+    @Override
+    public boolean isValid(Vector x, Vector y, DoubleDoubleFunction fa, 
DoubleDoubleFunction fc) {
+      return fa.isLikeRightPlus() && (fa.isAssociativeAndCommutative() || 
y.isSequentialAccess())
+          && fc.isLikeRightMult();
+    }
+
+    @Override
+    public double estimateCost(Vector x, Vector y, DoubleDoubleFunction fa, 
DoubleDoubleFunction fc) {
+      return y.getNumNondefaultElements() * y.getIteratorAdvanceCost() * 
x.getLookupCost() * x.getLookupCost();
+    }
+
+    @Override
+    public double aggregate(Vector x, Vector y, DoubleDoubleFunction fa, 
DoubleDoubleFunction fc) {
+      Iterator<Vector.Element> yi = y.nonZeroes().iterator();
+      if (!yi.hasNext()) {
+        return 0;
+      }
+      Vector.Element ye = yi.next();
+      double result = fc.apply(x.getQuick(ye.index()), ye.get());
+      while (yi.hasNext()) {
+        ye = yi.next();
+        result = fa.apply(result, fc.apply(x.getQuick(ye.index()), ye.get()));
+      }
+      return result;
+    }
+  }
+
+  public static class AggregateIterateIntersection extends 
VectorBinaryAggregate {
+
+    @Override
+    public boolean isValid(Vector x, Vector y, DoubleDoubleFunction fa, 
DoubleDoubleFunction fc) {
+      return fa.isLikeRightPlus() && fc.isLikeMult() && x.isSequentialAccess() 
&& y.isSequentialAccess();
+    }
+
+    @Override
+    public double estimateCost(Vector x, Vector y, DoubleDoubleFunction fa, 
DoubleDoubleFunction fc) {
+      return Math.min(x.getNumNondefaultElements() * 
x.getIteratorAdvanceCost(),
+          y.getNumNondefaultElements() * y.getIteratorAdvanceCost());
+    }
+
+    @Override
+    public double aggregate(Vector x, Vector y, DoubleDoubleFunction fa, 
DoubleDoubleFunction fc) {
+      Iterator<Vector.Element> xi = x.nonZeroes().iterator();
+      Iterator<Vector.Element> yi = y.nonZeroes().iterator();
+      Vector.Element xe = null;
+      Vector.Element ye = null;
+      boolean advanceThis = true;
+      boolean advanceThat = true;
+      boolean validResult = false;
+      double result = 0;
+      while (true) {
+        if (advanceThis) {
+          if (xi.hasNext()) {
+            xe = xi.next();
+          } else {
+            break;
+          }
+        }
+        if (advanceThat) {
+          if (yi.hasNext()) {
+            ye = yi.next();
+          } else {
+            break;
+          }
+        }
+        if (xe.index() == ye.index()) {
+          double thisResult = fc.apply(xe.get(), ye.get());
+          if (validResult) {
+            result = fa.apply(result, thisResult);
+          } else {
+            result = thisResult;
+            validResult = true;
+          }
+          advanceThis = true;
+          advanceThat = true;
+        } else {
+          if (xe.index() < ye.index()) { // f(x, 0) = 0
+            advanceThis = true;
+            advanceThat = false;
+          } else { // f(0, y) = 0
+            advanceThis = false;
+            advanceThat = true;
+          }
+        }
+      }
+      return result;
+    }
+  }
+
+  public static class AggregateIterateUnionSequential extends 
VectorBinaryAggregate {
+
+    @Override
+    public boolean isValid(Vector x, Vector y, DoubleDoubleFunction fa, 
DoubleDoubleFunction fc) {
+      return fa.isLikeRightPlus() && !fc.isDensifying()
+          && x.isSequentialAccess() && y.isSequentialAccess();
+    }
+
+    @Override
+    public double estimateCost(Vector x, Vector y, DoubleDoubleFunction fa, 
DoubleDoubleFunction fc) {
+      return Math.max(x.getNumNondefaultElements() * 
x.getIteratorAdvanceCost(),
+          y.getNumNondefaultElements() * y.getIteratorAdvanceCost());
+    }
+
+    @Override
+    public double aggregate(Vector x, Vector y, DoubleDoubleFunction fa, 
DoubleDoubleFunction fc) {
+      Iterator<Vector.Element> xi = x.nonZeroes().iterator();
+      Iterator<Vector.Element> yi = y.nonZeroes().iterator();
+      Vector.Element xe = null;
+      Vector.Element ye = null;
+      boolean advanceThis = true;
+      boolean advanceThat = true;
+      boolean validResult = false;
+      double result = 0;
+      while (true) {
+        if (advanceThis) {
+          if (xi.hasNext()) {
+            xe = xi.next();
+          } else {
+            xe = null;
+          }
+        }
+        if (advanceThat) {
+          if (yi.hasNext()) {
+            ye = yi.next();
+          } else {
+            ye = null;
+          }
+        }
+        double thisResult;
+        if (xe != null && ye != null) { // both vectors have nonzero elements
+          if (xe.index() == ye.index()) {
+            thisResult = fc.apply(xe.get(), ye.get());
+            advanceThis = true;
+            advanceThat = true;
+          } else {
+            if (xe.index() < ye.index()) { // f(x, 0)
+              thisResult = fc.apply(xe.get(), 0);
+              advanceThis = true;
+              advanceThat = false;
+            } else {
+              thisResult = fc.apply(0, ye.get());
+              advanceThis = false;
+              advanceThat = true;
+            }
+          }
+        } else if (xe != null) { // just the first one still has nonzeros
+          thisResult = fc.apply(xe.get(), 0);
+          advanceThis = true;
+          advanceThat = false;
+        } else if (ye != null) { // just the second one has nonzeros
+          thisResult = fc.apply(0, ye.get());
+          advanceThis = false;
+          advanceThat = true;
+        } else { // we're done, both are empty
+          break;
+        }
+        if (validResult) {
+          result = fa.apply(result, thisResult);
+        } else {
+          result = thisResult;
+          validResult =  true;
+        }
+      }
+      return result;
+    }
+  }
+
+  public static class AggregateIterateUnionRandom extends 
VectorBinaryAggregate {
+
+    @Override
+    public boolean isValid(Vector x, Vector y, DoubleDoubleFunction fa, 
DoubleDoubleFunction fc) {
+      return fa.isLikeRightPlus() && !fc.isDensifying()
+          && (fa.isAssociativeAndCommutative() || (x.isSequentialAccess() && 
y.isSequentialAccess()));
+    }
+
+    @Override
+    public double estimateCost(Vector x, Vector y, DoubleDoubleFunction fa, 
DoubleDoubleFunction fc) {
+      return Math.max(x.getNumNondefaultElements() * 
x.getIteratorAdvanceCost() * y.getLookupCost(),
+          y.getNumNondefaultElements() * y.getIteratorAdvanceCost() * 
x.getLookupCost());
+    }
+
+    @Override
+    public double aggregate(Vector x, Vector y, DoubleDoubleFunction fa, 
DoubleDoubleFunction fc) {
+      OpenIntHashSet visited = new OpenIntHashSet();
+      Iterator<Vector.Element> xi = x.nonZeroes().iterator();
+      boolean validResult = false;
+      double result = 0;
+      double thisResult;
+      while (xi.hasNext()) {
+        Vector.Element xe = xi.next();
+        thisResult = fc.apply(xe.get(), y.getQuick(xe.index()));
+        if (validResult) {
+          result = fa.apply(result, thisResult);
+        } else {
+          result = thisResult;
+          validResult = true;
+        }
+        visited.add(xe.index());
+      }
+      Iterator<Vector.Element> yi = y.nonZeroes().iterator();
+      while (yi.hasNext()) {
+        Vector.Element ye = yi.next();
+        if (!visited.contains(ye.index())) {
+          thisResult = fc.apply(x.getQuick(ye.index()), ye.get());
+          if (validResult) {
+            result = fa.apply(result, thisResult);
+          } else {
+            result = thisResult;
+            validResult = true;
+          }
+        }
+      }
+      return result;
+    }
+  }
+
+  public static class AggregateAllIterateSequential extends 
VectorBinaryAggregate {
+
+    @Override
+    public boolean isValid(Vector x, Vector y, DoubleDoubleFunction fa, 
DoubleDoubleFunction fc) {
+      return x.isSequentialAccess() && y.isSequentialAccess() && !x.isDense() 
&& !y.isDense();
+    }
+
+    @Override
+    public double estimateCost(Vector x, Vector y, DoubleDoubleFunction fa, 
DoubleDoubleFunction fc) {
+      return Math.max(x.size() * x.getIteratorAdvanceCost(), y.size() * 
y.getIteratorAdvanceCost());
+    }
+
+    @Override
+    public double aggregate(Vector x, Vector y, DoubleDoubleFunction fa, 
DoubleDoubleFunction fc) {
+      Iterator<Vector.Element> xi = x.all().iterator();
+      Iterator<Vector.Element> yi = y.all().iterator();
+      boolean validResult = false;
+      double result = 0;
+      while (xi.hasNext() && yi.hasNext()) {
+        Vector.Element xe = xi.next();
+        double thisResult = fc.apply(xe.get(), yi.next().get());
+        if (validResult) {
+          result = fa.apply(result, thisResult);
+        } else {
+          result = thisResult;
+          validResult = true;
+        }
+      }
+      return result;
+    }
+  }
+
+  public static class AggregateAllIterateThisLookupThat extends 
VectorBinaryAggregate {
+
+    @Override
+    public boolean isValid(Vector x, Vector y, DoubleDoubleFunction fa, 
DoubleDoubleFunction fc) {
+      return (fa.isAssociativeAndCommutative() || x.isSequentialAccess())
+          && !x.isDense();
+    }
+
+    @Override
+    public double estimateCost(Vector x, Vector y, DoubleDoubleFunction fa, 
DoubleDoubleFunction fc) {
+      return x.size() * x.getIteratorAdvanceCost() * y.getLookupCost();
+    }
+
+    @Override
+    public double aggregate(Vector x, Vector y, DoubleDoubleFunction fa, 
DoubleDoubleFunction fc) {
+      Iterator<Vector.Element> xi = x.all().iterator();
+      boolean validResult = false;
+      double result = 0;
+      while (xi.hasNext()) {
+        Vector.Element xe = xi.next();
+        double thisResult = fc.apply(xe.get(), y.getQuick(xe.index()));
+        if (validResult) {
+          result = fa.apply(result, thisResult);
+        } else {
+          result = thisResult;
+          validResult = true;
+        }
+      }
+      return result;
+    }
+  }
+
+  public static class AggregateAllIterateThatLookupThis extends 
VectorBinaryAggregate {
+
+    @Override
+    public boolean isValid(Vector x, Vector y, DoubleDoubleFunction fa, 
DoubleDoubleFunction fc) {
+      return (fa.isAssociativeAndCommutative() || y.isSequentialAccess())
+          && !y.isDense();
+    }
+
+    @Override
+    public double estimateCost(Vector x, Vector y, DoubleDoubleFunction fa, 
DoubleDoubleFunction fc) {
+      return y.size() * y.getIteratorAdvanceCost() * x.getLookupCost();
+    }
+
+    @Override
+    public double aggregate(Vector x, Vector y, DoubleDoubleFunction fa, 
DoubleDoubleFunction fc) {
+      Iterator<Vector.Element> yi = y.all().iterator();
+      boolean validResult = false;
+      double result = 0;
+      while (yi.hasNext()) {
+        Vector.Element ye = yi.next();
+        double thisResult = fc.apply(x.getQuick(ye.index()), ye.get());
+        if (validResult) {
+          result = fa.apply(result, thisResult);
+        } else {
+          result = thisResult;
+          validResult = true;
+        }
+      }
+      return result;
+    }
+  }
+
+  public static class AggregateAllLoop extends VectorBinaryAggregate {
+
+    @Override
+    public boolean isValid(Vector x, Vector y, DoubleDoubleFunction fa, 
DoubleDoubleFunction fc) {
+      return true;
+    }
+
+    @Override
+    public double estimateCost(Vector x, Vector y, DoubleDoubleFunction fa, 
DoubleDoubleFunction fc) {
+      return x.size() * x.getLookupCost() * y.getLookupCost();
+    }
+
+    @Override
+    public double aggregate(Vector x, Vector y, DoubleDoubleFunction fa, 
DoubleDoubleFunction fc) {
+      double result = fc.apply(x.getQuick(0), y.getQuick(0));
+      int s = x.size();
+      for (int i = 1; i < s; ++i) {
+        result = fa.apply(result, fc.apply(x.getQuick(i), y.getQuick(i)));
+      }
+      return result;
+    }
+  }
+}

http://git-wip-us.apache.org/repos/asf/mahout/blob/545648f6/core/src/main/java/org/apache/mahout/math/VectorBinaryAssign.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/mahout/math/VectorBinaryAssign.java 
b/core/src/main/java/org/apache/mahout/math/VectorBinaryAssign.java
new file mode 100644
index 0000000..f24d552
--- /dev/null
+++ b/core/src/main/java/org/apache/mahout/math/VectorBinaryAssign.java
@@ -0,0 +1,667 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.math;
+
+import org.apache.mahout.math.Vector.Element;
+import org.apache.mahout.math.function.DoubleDoubleFunction;
+import org.apache.mahout.math.set.OpenIntHashSet;
+
+import java.util.Iterator;
+
+/**
+ * Abstract class encapsulating different algorithms that perform the Vector 
operations assign().
+ * x.assign(y, f), for x and y Vectors and f a DoubleDouble function:
+ * - applies the function f to every element in x and y, f(xi, yi)
+ * - assigns xi = f(xi, yi) for all indices i
+ *
+ * The names of variables, methods and classes used here follow the following 
conventions:
+ * The vector being assigned to (the left hand side) is called this or x.
+ * The right hand side is called that or y.
+ * The function to be applied is called f.
+ *
+ * The different algorithms take into account the different characteristics of 
vector classes:
+ * - whether the vectors support sequential iteration (isSequential())
+ * - whether the vectors support constant-time additions (isAddConstantTime())
+ * - what the lookup cost is (getLookupCost())
+ * - what the iterator advancement cost is (getIteratorAdvanceCost())
+ *
+ * The names of the actual classes (they're nested in VectorBinaryAssign) 
describe the used for assignment.
+ * The most important optimization is iterating just through the nonzeros 
(only possible if f(0, 0) = 0).
+ * There are 4 main possibilities:
+ * - iterating through the nonzeros of just one vector and looking up the 
corresponding elements in the other
+ * - iterating through the intersection of nonzeros (those indices where both 
vectors have nonzero values)
+ * - iterating through the union of nonzeros (those indices where at least one 
of the vectors has a nonzero value)
+ * - iterating through all the elements in some way (either through both at 
the same time, both one after the other,
+ *   looking up both, looking up just one).
+ * Then, there are two additional sub-possibilities:
+ * - if a new value can be added to x in constant time (isAddConstantTime()), 
the *Inplace updates are used
+ * - otherwise (really just for SequentialAccessSparseVectors right now), the 
*Merge updates are used, where
+ *   a sorted list of (index, value) pairs is merged into the vector at the 
end.
+ *
+ * The internal details are not important and a particular algorithm should 
generally not be called explicitly.
+ * The best one will be selected through assignBest(), which is itself called 
through Vector.assign().
+ *
+ * See 
https://docs.google.com/document/d/1g1PjUuvjyh2LBdq2_rKLIcUiDbeOORA1sCJiSsz-JVU/edit#
 for a more detailed
+ * explanation.
+ */
+public abstract class VectorBinaryAssign {
+  public static final VectorBinaryAssign[] OPERATIONS = {
+    new AssignNonzerosIterateThisLookupThat(),
+    new AssignNonzerosIterateThatLookupThisMergeUpdates(),
+    new AssignNonzerosIterateThatLookupThisInplaceUpdates(),
+
+    new AssignIterateIntersection(),
+
+    new AssignIterateUnionSequentialMergeUpdates(),
+    new AssignIterateUnionSequentialInplaceUpdates(),
+    new AssignIterateUnionRandomMergeUpdates(),
+    new AssignIterateUnionRandomInplaceUpdates(),
+
+    new AssignAllIterateSequentialMergeUpdates(),
+    new AssignAllIterateSequentialInplaceUpdates(),
+    new AssignAllIterateThisLookupThatMergeUpdates(),
+    new AssignAllIterateThisLookupThatInplaceUpdates(),
+    new AssignAllIterateThatLookupThisMergeUpdates(),
+    new AssignAllIterateThatLookupThisInplaceUpdates(),
+    new AssignAllLoopMergeUpdates(),
+    new AssignAllLoopInplaceUpdates(),
+  };
+
+  /**
+   * Returns true iff we can use this algorithm to apply f to x and y 
component-wise and assign the result to x.
+   */
+  public abstract boolean isValid(Vector x, Vector y, DoubleDoubleFunction f);
+
+  /**
+   * Estimates the cost of using this algorithm to compute the assignment. The 
algorithm is assumed to be valid.
+   */
+  public abstract double estimateCost(Vector x, Vector y, DoubleDoubleFunction 
f);
+
+  /**
+   * Main method that applies f to x and y component-wise assigning the 
results to x. It returns the modified vector,
+   * x.
+   */
+  public abstract Vector assign(Vector x, Vector y, DoubleDoubleFunction f);
+
+  /**
+   * The best operation is the least expensive valid one.
+   */
+  public static VectorBinaryAssign getBestOperation(Vector x, Vector y, 
DoubleDoubleFunction f) {
+    int bestOperationIndex = -1;
+    double bestCost = Double.POSITIVE_INFINITY;
+    for (int i = 0; i < OPERATIONS.length; ++i) {
+      if (OPERATIONS[i].isValid(x, y, f)) {
+        double cost = OPERATIONS[i].estimateCost(x, y, f);
+        if (cost < bestCost) {
+          bestCost = cost;
+          bestOperationIndex = i;
+        }
+      }
+    }
+    return OPERATIONS[bestOperationIndex];
+  }
+
+  /**
+   * This is the method that should be used when assigning. It selects the 
best algorithm and applies it.
+   * Note that it does NOT invalidate the cached length of the Vector and 
should only be used through the wrapprs
+   * in AbstractVector.
+   */
+  public static Vector assignBest(Vector x, Vector y, DoubleDoubleFunction f) {
+    return getBestOperation(x, y, f).assign(x, y, f);
+  }
+
+  /**
+   * If f(0, y) = 0, the zeros in x don't matter and we can simply iterate 
through the nonzeros of x.
+   * To get the corresponding element of y, we perform a lookup.
+   * There are no *Merge or *Inplace versions because in this case x cannot 
become more dense because of f, meaning
+   * all changes will occur at indices whose values are already nonzero.
+   */
+  public static class AssignNonzerosIterateThisLookupThat extends 
VectorBinaryAssign {
+
+    @Override
+    public boolean isValid(Vector x, Vector y, DoubleDoubleFunction f) {
+      return f.isLikeLeftMult();
+    }
+
+    @Override
+    public double estimateCost(Vector x, Vector y, DoubleDoubleFunction f) {
+      return x.getNumNondefaultElements() * x.getIteratorAdvanceCost() * 
y.getLookupCost();
+    }
+
+    @Override
+    public Vector assign(Vector x, Vector y, DoubleDoubleFunction f) {
+      for (Element xe : x.nonZeroes()) {
+        xe.set(f.apply(xe.get(), y.getQuick(xe.index())));
+      }
+      return x;
+    }
+  }
+
+  /**
+   * If f(x, 0) = x, the zeros in y don't matter and we can simply iterate 
through the nonzeros of y.
+   * We get the corresponding element of x through a lookup and update x 
inplace.
+   */
+  public static class AssignNonzerosIterateThatLookupThisInplaceUpdates 
extends VectorBinaryAssign {
+
+    @Override
+    public boolean isValid(Vector x, Vector y, DoubleDoubleFunction f) {
+      return f.isLikeRightPlus();
+    }
+
+    @Override
+    public double estimateCost(Vector x, Vector y, DoubleDoubleFunction f) {
+      return y.getNumNondefaultElements() * y.getIteratorAdvanceCost() * 
x.getLookupCost() * x.getLookupCost();
+    }
+
+    @Override
+    public Vector assign(Vector x, Vector y, DoubleDoubleFunction f) {
+      for (Element ye : y.nonZeroes()) {
+        x.setQuick(ye.index(), f.apply(x.getQuick(ye.index()), ye.get()));
+      }
+      return x;
+    }
+  }
+
+  /**
+   * If f(x, 0) = x, the zeros in y don't matter and we can simply iterate 
through the nonzeros of y.
+   * We get the corresponding element of x through a lookup and update x by 
merging.
+   */
+  public static class AssignNonzerosIterateThatLookupThisMergeUpdates extends 
VectorBinaryAssign {
+
+    @Override
+    public boolean isValid(Vector x, Vector y, DoubleDoubleFunction f) {
+      return f.isLikeRightPlus() && y.isSequentialAccess() && 
!x.isAddConstantTime();
+    }
+
+    @Override
+    public double estimateCost(Vector x, Vector y, DoubleDoubleFunction f) {
+      return y.getNumNondefaultElements() * y.getIteratorAdvanceCost() * 
y.getLookupCost();
+    }
+
+    @Override
+    public Vector assign(Vector x, Vector y, DoubleDoubleFunction f) {
+      OrderedIntDoubleMapping updates = new OrderedIntDoubleMapping(false);
+      for (Element ye : y.nonZeroes()) {
+        updates.set(ye.index(), f.apply(x.getQuick(ye.index()), ye.get()));
+      }
+      x.mergeUpdates(updates);
+      return x;
+    }
+  }
+
+  /**
+   * If f(x, 0) = x and f(0, y) = 0 the zeros in x and y don't matter and we 
can iterate through the nonzeros
+   * in both x and y.
+   * This is only possible if both x and y support sequential access.
+   */
+  public static class AssignIterateIntersection extends VectorBinaryAssign {
+
+    @Override
+    public boolean isValid(Vector x, Vector y, DoubleDoubleFunction f) {
+      return f.isLikeLeftMult() && f.isLikeRightPlus() && 
x.isSequentialAccess() && y.isSequentialAccess();
+    }
+
+    @Override
+    public double estimateCost(Vector x, Vector y, DoubleDoubleFunction f) {
+      return Math.min(x.getNumNondefaultElements() * 
x.getIteratorAdvanceCost(),
+          y.getNumNondefaultElements() * y.getIteratorAdvanceCost());
+    }
+
+    @Override
+    public Vector assign(Vector x, Vector y, DoubleDoubleFunction f) {
+      Iterator<Vector.Element> xi = x.nonZeroes().iterator();
+      Iterator<Vector.Element> yi = y.nonZeroes().iterator();
+      Vector.Element xe = null;
+      Vector.Element ye = null;
+      boolean advanceThis = true;
+      boolean advanceThat = true;
+      while (true) {
+        if (advanceThis) {
+          if (xi.hasNext()) {
+            xe = xi.next();
+          } else {
+            break;
+          }
+        }
+        if (advanceThat) {
+          if (yi.hasNext()) {
+            ye = yi.next();
+          } else {
+            break;
+          }
+        }
+        if (xe.index() == ye.index()) {
+          xe.set(f.apply(xe.get(), ye.get()));
+          advanceThis = true;
+          advanceThat = true;
+        } else {
+          if (xe.index() < ye.index()) { // f(x, 0) = 0
+            advanceThis = true;
+            advanceThat = false;
+          } else { // f(0, y) = 0
+            advanceThis = false;
+            advanceThat = true;
+          }
+        }
+      }
+      return x;
+    }
+  }
+
+  /**
+   * If f(0, 0) = 0 we can iterate through the nonzeros in either x or y.
+   * In this case we iterate through them in parallel and update x by merging. 
Because we're iterating through
+   * both vectors at the same time, x and y need to support sequential access.
+   */
+  public static class AssignIterateUnionSequentialMergeUpdates extends 
VectorBinaryAssign {
+
+    @Override
+    public boolean isValid(Vector x, Vector y, DoubleDoubleFunction f) {
+      return !f.isDensifying() && x.isSequentialAccess() && 
y.isSequentialAccess() && !x.isAddConstantTime();
+    }
+
+    @Override
+    public double estimateCost(Vector x, Vector y, DoubleDoubleFunction f) {
+      return Math.max(x.getNumNondefaultElements() * 
x.getIteratorAdvanceCost(),
+          y.getNumNondefaultElements() * y.getIteratorAdvanceCost());
+    }
+
+    @Override
+    public Vector assign(Vector x, Vector y, DoubleDoubleFunction f) {
+      Iterator<Vector.Element> xi = x.nonZeroes().iterator();
+      Iterator<Vector.Element> yi = y.nonZeroes().iterator();
+      Vector.Element xe = null;
+      Vector.Element ye = null;
+      boolean advanceThis = true;
+      boolean advanceThat = true;
+      OrderedIntDoubleMapping updates = new OrderedIntDoubleMapping(false);
+      while (true) {
+        if (advanceThis) {
+          if (xi.hasNext()) {
+            xe = xi.next();
+          } else {
+            xe = null;
+          }
+        }
+        if (advanceThat) {
+          if (yi.hasNext()) {
+            ye = yi.next();
+          } else {
+            ye = null;
+          }
+        }
+        if (xe != null && ye != null) { // both vectors have nonzero elements
+          if (xe.index() == ye.index()) {
+            xe.set(f.apply(xe.get(), ye.get()));
+            advanceThis = true;
+            advanceThat = true;
+          } else {
+            if (xe.index() < ye.index()) { // f(x, 0)
+              xe.set(f.apply(xe.get(), 0));
+              advanceThis = true;
+              advanceThat = false;
+            } else {
+              updates.set(ye.index(), f.apply(0, ye.get()));
+              advanceThis = false;
+              advanceThat = true;
+            }
+          }
+        } else if (xe != null) { // just the first one still has nonzeros
+          xe.set(f.apply(xe.get(), 0));
+          advanceThis = true;
+          advanceThat = false;
+        } else if (ye != null) { // just the second one has nonzeros
+          updates.set(ye.index(), f.apply(0, ye.get()));
+          advanceThis = false;
+          advanceThat = true;
+        } else { // we're done, both are empty
+          break;
+        }
+      }
+      x.mergeUpdates(updates);
+      return x;
+    }
+  }
+
+  /**
+   * If f(0, 0) = 0 we can iterate through the nonzeros in either x or y.
+   * In this case we iterate through them in parallel and update x inplace. 
Because we're iterating through
+   * both vectors at the same time, x and y need to support sequential access.
+   */
+  public static class AssignIterateUnionSequentialInplaceUpdates extends 
VectorBinaryAssign {
+
+    @Override
+    public boolean isValid(Vector x, Vector y, DoubleDoubleFunction f) {
+      return !f.isDensifying() && x.isSequentialAccess() && 
y.isSequentialAccess() && x.isAddConstantTime();
+    }
+
+    @Override
+    public double estimateCost(Vector x, Vector y, DoubleDoubleFunction f) {
+      return Math.max(x.getNumNondefaultElements() * 
x.getIteratorAdvanceCost(),
+          y.getNumNondefaultElements() * y.getIteratorAdvanceCost());
+    }
+
+    @Override
+    public Vector assign(Vector x, Vector y, DoubleDoubleFunction f) {
+      Iterator<Vector.Element> xi = x.nonZeroes().iterator();
+      Iterator<Vector.Element> yi = y.nonZeroes().iterator();
+      Vector.Element xe = null;
+      Vector.Element ye = null;
+      boolean advanceThis = true;
+      boolean advanceThat = true;
+      while (true) {
+        if (advanceThis) {
+          if (xi.hasNext()) {
+            xe = xi.next();
+          } else {
+            xe = null;
+          }
+        }
+        if (advanceThat) {
+          if (yi.hasNext()) {
+            ye = yi.next();
+          } else {
+            ye = null;
+          }
+        }
+        if (xe != null && ye != null) { // both vectors have nonzero elements
+          if (xe.index() == ye.index()) {
+            xe.set(f.apply(xe.get(), ye.get()));
+            advanceThis = true;
+            advanceThat = true;
+          } else {
+            if (xe.index() < ye.index()) { // f(x, 0)
+              xe.set(f.apply(xe.get(), 0));
+              advanceThis = true;
+              advanceThat = false;
+            } else {
+              x.setQuick(ye.index(), f.apply(0, ye.get()));
+              advanceThis = false;
+              advanceThat = true;
+            }
+          }
+        } else if (xe != null) { // just the first one still has nonzeros
+          xe.set(f.apply(xe.get(), 0));
+          advanceThis = true;
+          advanceThat = false;
+        } else if (ye != null) { // just the second one has nonzeros
+          x.setQuick(ye.index(), f.apply(0, ye.get()));
+          advanceThis = false;
+          advanceThat = true;
+        } else { // we're done, both are empty
+          break;
+        }
+      }
+      return x;
+    }
+  }
+
+  /**
+   * If f(0, 0) = 0 we can iterate through the nonzeros in either x or y.
+   * In this case, we iterate through the nozeros of x and y alternatively 
(this works even when one of them
+   * doesn't support sequential access). Since we're merging the results into 
x, when iterating through y, the
+   * order of iteration matters and y must support sequential access.
+   */
+  public static class AssignIterateUnionRandomMergeUpdates extends 
VectorBinaryAssign {
+
+    @Override
+    public boolean isValid(Vector x, Vector y, DoubleDoubleFunction f) {
+      return !f.isDensifying() && !x.isAddConstantTime() && 
y.isSequentialAccess();
+    }
+
+    @Override
+    public double estimateCost(Vector x, Vector y, DoubleDoubleFunction f) {
+      return Math.max(x.getNumNondefaultElements() * 
x.getIteratorAdvanceCost() * y.getLookupCost(),
+          y.getNumNondefaultElements() * y.getIteratorAdvanceCost() * 
x.getLookupCost());
+    }
+
+    @Override
+    public Vector assign(Vector x, Vector y, DoubleDoubleFunction f) {
+      OpenIntHashSet visited = new OpenIntHashSet();
+      for (Element xe : x.nonZeroes()) {
+        xe.set(f.apply(xe.get(), y.getQuick(xe.index())));
+        visited.add(xe.index());
+      }
+      OrderedIntDoubleMapping updates = new OrderedIntDoubleMapping(false);
+      for (Element ye : y.nonZeroes()) {
+        if (!visited.contains(ye.index())) {
+          updates.set(ye.index(), f.apply(x.getQuick(ye.index()), ye.get()));
+        }
+      }
+      x.mergeUpdates(updates);
+      return x;
+    }
+  }
+
+  /**
+   * If f(0, 0) = 0 we can iterate through the nonzeros in either x or y.
+   * In this case, we iterate through the nozeros of x and y alternatively 
(this works even when one of them
+   * doesn't support sequential access). Because updates to x are inplace, 
neither x, nor y need to support
+   * sequential access.
+   */
+  public static class AssignIterateUnionRandomInplaceUpdates extends 
VectorBinaryAssign {
+
+    @Override
+    public boolean isValid(Vector x, Vector y, DoubleDoubleFunction f) {
+      return !f.isDensifying() && x.isAddConstantTime();
+    }
+
+    @Override
+    public double estimateCost(Vector x, Vector y, DoubleDoubleFunction f) {
+      return Math.max(x.getNumNondefaultElements() * 
x.getIteratorAdvanceCost() * y.getLookupCost(),
+          y.getNumNondefaultElements() * y.getIteratorAdvanceCost() * 
x.getLookupCost());
+    }
+    @Override
+    public Vector assign(Vector x, Vector y, DoubleDoubleFunction f) {
+      OpenIntHashSet visited = new OpenIntHashSet();
+      for (Element xe : x.nonZeroes()) {
+        xe.set(f.apply(xe.get(), y.getQuick(xe.index())));
+        visited.add(xe.index());
+      }
+      for (Element ye : y.nonZeroes()) {
+        if (!visited.contains(ye.index())) {
+          x.setQuick(ye.index(), f.apply(x.getQuick(ye.index()), ye.get()));
+        }
+      }
+      return x;
+    }
+  }
+
+  public static class AssignAllIterateSequentialMergeUpdates extends 
VectorBinaryAssign {
+
+    @Override
+    public boolean isValid(Vector x, Vector y, DoubleDoubleFunction f) {
+      return x.isSequentialAccess() && y.isSequentialAccess() && 
!x.isAddConstantTime() && !x.isDense() && !y.isDense();
+    }
+
+    @Override
+    public double estimateCost(Vector x, Vector y, DoubleDoubleFunction f) {
+      return Math.max(x.size() * x.getIteratorAdvanceCost(), y.size() * 
y.getIteratorAdvanceCost());
+    }
+
+    @Override
+    public Vector assign(Vector x, Vector y, DoubleDoubleFunction f) {
+      Iterator<Vector.Element> xi = x.all().iterator();
+      Iterator<Vector.Element> yi = y.all().iterator();
+      OrderedIntDoubleMapping updates = new OrderedIntDoubleMapping(false);
+      while (xi.hasNext() && yi.hasNext()) {
+        Element xe = xi.next();
+        updates.set(xe.index(), f.apply(xe.get(), yi.next().get()));
+      }
+      x.mergeUpdates(updates);
+      return x;
+    }
+  }
+
+  public static class AssignAllIterateSequentialInplaceUpdates extends 
VectorBinaryAssign {
+
+    @Override
+    public boolean isValid(Vector x, Vector y, DoubleDoubleFunction f) {
+      return x.isSequentialAccess() && y.isSequentialAccess() && 
x.isAddConstantTime()
+          && !x.isDense() && !y.isDense();
+    }
+
+    @Override
+    public double estimateCost(Vector x, Vector y, DoubleDoubleFunction f) {
+      return Math.max(x.size() * x.getIteratorAdvanceCost(), y.size() * 
y.getIteratorAdvanceCost());
+    }
+
+    @Override
+    public Vector assign(Vector x, Vector y, DoubleDoubleFunction f) {
+      Iterator<Vector.Element> xi = x.all().iterator();
+      Iterator<Vector.Element> yi = y.all().iterator();
+      while (xi.hasNext() && yi.hasNext()) {
+        Element xe = xi.next();
+        x.setQuick(xe.index(), f.apply(xe.get(), yi.next().get()));
+      }
+      return x;
+    }
+  }
+
+  public static class AssignAllIterateThisLookupThatMergeUpdates extends 
VectorBinaryAssign {
+
+    @Override
+    public boolean isValid(Vector x, Vector y, DoubleDoubleFunction f) {
+      return !x.isAddConstantTime() && !x.isDense();
+    }
+
+    @Override
+    public double estimateCost(Vector x, Vector y, DoubleDoubleFunction f) {
+      return x.size() * x.getIteratorAdvanceCost() * y.getLookupCost();
+    }
+
+    @Override
+    public Vector assign(Vector x, Vector y, DoubleDoubleFunction f) {
+      OrderedIntDoubleMapping updates = new OrderedIntDoubleMapping(false);
+      for (Element xe : x.all()) {
+        updates.set(xe.index(), f.apply(xe.get(), y.getQuick(xe.index())));
+      }
+      x.mergeUpdates(updates);
+      return x;
+    }
+  }
+
+  public static class AssignAllIterateThisLookupThatInplaceUpdates extends 
VectorBinaryAssign {
+
+    @Override
+    public boolean isValid(Vector x, Vector y, DoubleDoubleFunction f) {
+      return x.isAddConstantTime() && !x.isDense();
+    }
+
+    @Override
+    public double estimateCost(Vector x, Vector y, DoubleDoubleFunction f) {
+      return x.size() * x.getIteratorAdvanceCost() * y.getLookupCost();
+    }
+
+    @Override
+    public Vector assign(Vector x, Vector y, DoubleDoubleFunction f) {
+      for (Element xe : x.all()) {
+        x.setQuick(xe.index(), f.apply(xe.get(), y.getQuick(xe.index())));
+      }
+      return x;
+    }
+  }
+
+  public static class AssignAllIterateThatLookupThisMergeUpdates extends 
VectorBinaryAssign {
+
+    @Override
+    public boolean isValid(Vector x, Vector y, DoubleDoubleFunction f) {
+      return !x.isAddConstantTime() && !y.isDense();
+    }
+
+    @Override
+    public double estimateCost(Vector x, Vector y, DoubleDoubleFunction f) {
+      return y.size() * y.getIteratorAdvanceCost() * x.getLookupCost();
+    }
+
+    @Override
+    public Vector assign(Vector x, Vector y, DoubleDoubleFunction f) {
+      OrderedIntDoubleMapping updates = new OrderedIntDoubleMapping(false);
+      for (Element ye : y.all()) {
+        updates.set(ye.index(), f.apply(x.getQuick(ye.index()), ye.get()));
+      }
+      x.mergeUpdates(updates);
+      return x;
+    }
+  }
+
+  public static class AssignAllIterateThatLookupThisInplaceUpdates extends 
VectorBinaryAssign {
+
+    @Override
+    public boolean isValid(Vector x, Vector y, DoubleDoubleFunction f) {
+      return x.isAddConstantTime() && !y.isDense();
+    }
+
+    @Override
+    public double estimateCost(Vector x, Vector y, DoubleDoubleFunction f) {
+      return y.size() * y.getIteratorAdvanceCost() * x.getLookupCost();
+    }
+
+    @Override
+    public Vector assign(Vector x, Vector y, DoubleDoubleFunction f) {
+      for (Element ye : y.all()) {
+        x.setQuick(ye.index(), f.apply(x.getQuick(ye.index()), ye.get()));
+      }
+      return x;
+    }
+  }
+
+  public static class AssignAllLoopMergeUpdates extends VectorBinaryAssign {
+
+    @Override
+    public boolean isValid(Vector x, Vector y, DoubleDoubleFunction f) {
+      return !x.isAddConstantTime();
+    }
+
+    @Override
+    public double estimateCost(Vector x, Vector y, DoubleDoubleFunction f) {
+      return x.size() * x.getLookupCost() * y.getLookupCost();
+    }
+
+    @Override
+    public Vector assign(Vector x, Vector y, DoubleDoubleFunction f) {
+      OrderedIntDoubleMapping updates = new OrderedIntDoubleMapping(false);
+      for (int i = 0; i < x.size(); ++i) {
+        updates.set(i, f.apply(x.getQuick(i), y.getQuick(i)));
+      }
+      x.mergeUpdates(updates);
+      return x;
+    }
+  }
+
+  public static class AssignAllLoopInplaceUpdates extends VectorBinaryAssign {
+
+    @Override
+    public boolean isValid(Vector x, Vector y, DoubleDoubleFunction f) {
+      return x.isAddConstantTime();
+    }
+
+    @Override
+    public double estimateCost(Vector x, Vector y, DoubleDoubleFunction f) {
+      return x.size() * x.getLookupCost() * y.getLookupCost();
+    }
+
+    @Override
+    public Vector assign(Vector x, Vector y, DoubleDoubleFunction f) {
+      for (int i = 0; i < x.size(); ++i) {
+        x.setQuick(i, f.apply(x.getQuick(i), y.getQuick(i)));
+      }
+      return x;
+    }
+  }
+}

http://git-wip-us.apache.org/repos/asf/mahout/blob/545648f6/core/src/main/java/org/apache/mahout/math/VectorIterable.java
----------------------------------------------------------------------
diff --git a/core/src/main/java/org/apache/mahout/math/VectorIterable.java 
b/core/src/main/java/org/apache/mahout/math/VectorIterable.java
new file mode 100644
index 0000000..8414fdb
--- /dev/null
+++ b/core/src/main/java/org/apache/mahout/math/VectorIterable.java
@@ -0,0 +1,56 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one or more
+ * contributor license agreements.  See the NOTICE file distributed with
+ * this work for additional information regarding copyright ownership.
+ * The ASF licenses this file to You under the Apache License, Version 2.0
+ * (the "License"); you may not use this file except in compliance with
+ * the License.  You may obtain a copy of the License at
+ *
+ *     http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.mahout.math;
+
+import java.util.Iterator;
+
+public interface VectorIterable extends Iterable<MatrixSlice> {
+
+  /* Iterate all rows in order */
+  Iterator<MatrixSlice> iterateAll();
+
+  /* Iterate all non empty rows in arbitrary order */
+  Iterator<MatrixSlice> iterateNonEmpty();
+
+  int numSlices();
+
+  int numRows();
+
+  int numCols();
+
+  /**
+   * Return a new vector with cardinality equal to getNumRows() of this matrix 
which is the matrix product of the
+   * recipient and the argument
+   *
+   * @param v a vector with cardinality equal to getNumCols() of the recipient
+   * @return a new vector (typically a DenseVector)
+   * @throws CardinalityException if this.getNumRows() != v.size()
+   */
+  Vector times(Vector v);
+
+  /**
+   * Convenience method for producing this.transpose().times(this.times(v)), 
which can be implemented with only one pass
+   * over the matrix, without making the transpose() call (which can be 
expensive if the matrix is sparse)
+   *
+   * @param v a vector with cardinality equal to getNumCols() of the recipient
+   * @return a new vector (typically a DenseVector) with cardinality equal to 
that of the argument.
+   * @throws CardinalityException if this.getNumCols() != v.size()
+   */
+  Vector timesSquared(Vector v);
+
+}

[09/15] mahout git commit: NO-JIRA Trevors updates

Reply via email to