http://git-wip-us.apache.org/repos/asf/mahout/blob/99a5358f/math/src/main/java/org/apache/mahout/math/SparseRowMatrix.java ---------------------------------------------------------------------- diff --git a/math/src/main/java/org/apache/mahout/math/SparseRowMatrix.java b/math/src/main/java/org/apache/mahout/math/SparseRowMatrix.java deleted file mode 100644 index 6e06769..0000000 --- a/math/src/main/java/org/apache/mahout/math/SparseRowMatrix.java +++ /dev/null @@ -1,236 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.mahout.math; - -import org.apache.mahout.math.flavor.MatrixFlavor; -import org.apache.mahout.math.flavor.TraversingStructureEnum; -import org.apache.mahout.math.function.Functions; - -/** - * sparse matrix with general element values whose rows are accessible quickly. Implemented as a row - * array of either SequentialAccessSparseVectors or RandomAccessSparseVectors. - */ -public class SparseRowMatrix extends AbstractMatrix { - private Vector[] rowVectors; - - private final boolean randomAccessRows; - - /** - * Construct a sparse matrix starting with the provided row vectors. - * - * @param rows The number of rows in the result - * @param columns The number of columns in the result - * @param rowVectors a Vector[] array of rows - */ - public SparseRowMatrix(int rows, int columns, Vector[] rowVectors) { - this(rows, columns, rowVectors, false, rowVectors instanceof RandomAccessSparseVector[]); - } - - public SparseRowMatrix(int rows, int columns, boolean randomAccess) { - this(rows, columns, randomAccess - ? new RandomAccessSparseVector[rows] - : new SequentialAccessSparseVector[rows], - true, - randomAccess); - } - - public SparseRowMatrix(int rows, int columns, Vector[] vectors, boolean shallowCopy, boolean randomAccess) { - super(rows, columns); - this.randomAccessRows = randomAccess; - this.rowVectors = vectors.clone(); - for (int row = 0; row < rows; row++) { - if (vectors[row] == null) { - // TODO: this can't be right to change the argument - vectors[row] = randomAccess - ? new RandomAccessSparseVector(numCols(), 10) - : new SequentialAccessSparseVector(numCols(), 10); - } - this.rowVectors[row] = shallowCopy ? vectors[row] : vectors[row].clone(); - } - } - - /** - * Construct a matrix of the given cardinality, with rows defaulting to RandomAccessSparseVector - * implementation - * - * @param rows Number of rows in result - * @param columns Number of columns in result - */ - public SparseRowMatrix(int rows, int columns) { - this(rows, columns, true); - } - - @Override - public Matrix clone() { - SparseRowMatrix clone = (SparseRowMatrix) super.clone(); - clone.rowVectors = new Vector[rowVectors.length]; - for (int i = 0; i < rowVectors.length; i++) { - clone.rowVectors[i] = rowVectors[i].clone(); - } - return clone; - } - - @Override - public double getQuick(int row, int column) { - return rowVectors[row] == null ? 0.0 : rowVectors[row].getQuick(column); - } - - @Override - public Matrix like() { - return new SparseRowMatrix(rowSize(), columnSize(), randomAccessRows); - } - - @Override - public Matrix like(int rows, int columns) { - return new SparseRowMatrix(rows, columns, randomAccessRows); - } - - @Override - public void setQuick(int row, int column, double value) { - rowVectors[row].setQuick(column, value); - } - - @Override - public int[] getNumNondefaultElements() { - int[] result = new int[2]; - result[ROW] = rowVectors.length; - for (int row = 0; row < rowSize(); row++) { - result[COL] = Math.max(result[COL], rowVectors[row].getNumNondefaultElements()); - } - return result; - } - - @Override - public Matrix viewPart(int[] offset, int[] size) { - if (offset[ROW] < 0) { - throw new IndexException(offset[ROW], rowVectors.length); - } - if (offset[ROW] + size[ROW] > rowVectors.length) { - throw new IndexException(offset[ROW] + size[ROW], rowVectors.length); - } - if (offset[COL] < 0) { - throw new IndexException(offset[COL], rowVectors[ROW].size()); - } - if (offset[COL] + size[COL] > rowVectors[ROW].size()) { - throw new IndexException(offset[COL] + size[COL], rowVectors[ROW].size()); - } - return new MatrixView(this, offset, size); - } - - @Override - public Matrix assignColumn(int column, Vector other) { - if (rowSize() != other.size()) { - throw new CardinalityException(rowSize(), other.size()); - } - if (column < 0 || column >= columnSize()) { - throw new IndexException(column, columnSize()); - } - for (int row = 0; row < rowSize(); row++) { - rowVectors[row].setQuick(column, other.getQuick(row)); - } - return this; - } - - @Override - public Matrix assignRow(int row, Vector other) { - if (columnSize() != other.size()) { - throw new CardinalityException(columnSize(), other.size()); - } - if (row < 0 || row >= rowSize()) { - throw new IndexException(row, rowSize()); - } - rowVectors[row].assign(other); - return this; - } - - /** - * @param row an int row index - * @return a shallow view of the Vector at specified row (ie you may mutate the original matrix - * using this row) - */ - @Override - public Vector viewRow(int row) { - if (row < 0 || row >= rowSize()) { - throw new IndexException(row, rowSize()); - } - return rowVectors[row]; - } - - @Override - public Matrix transpose() { - SparseColumnMatrix scm = new SparseColumnMatrix(columns, rows); - for (int i = 0; i < rows; i++) { - Vector row = rowVectors[i]; - if (row.getNumNonZeroElements() > 0) { - scm.assignColumn(i, row); - } - } - return scm; - } - - @Override - public Matrix times(Matrix other) { - if (columnSize() != other.rowSize()) { - throw new CardinalityException(columnSize(), other.rowSize()); - } - - if (other instanceof SparseRowMatrix) { - SparseRowMatrix y = (SparseRowMatrix) other; - SparseRowMatrix result = (SparseRowMatrix) like(rowSize(), other.columnSize()); - - for (int i = 0; i < rows; i++) { - Vector row = rowVectors[i]; - for (Vector.Element element : row.nonZeroes()) { - result.rowVectors[i].assign(y.rowVectors[element.index()], Functions.plusMult(element.get())); - } - } - return result; - } else { - if (other.viewRow(0).isDense()) { - // result is dense, but can be computed relatively cheaply - Matrix result = other.like(rowSize(), other.columnSize()); - - for (int i = 0; i < rows; i++) { - Vector row = rowVectors[i]; - Vector r = new DenseVector(other.columnSize()); - for (Vector.Element element : row.nonZeroes()) { - r.assign(other.viewRow(element.index()), Functions.plusMult(element.get())); - } - result.viewRow(i).assign(r); - } - return result; - } else { - // other is sparse, but not something we understand intimately - SparseRowMatrix result = (SparseRowMatrix) like(rowSize(), other.columnSize()); - - for (int i = 0; i < rows; i++) { - Vector row = rowVectors[i]; - for (Vector.Element element : row.nonZeroes()) { - result.rowVectors[i].assign(other.viewRow(element.index()), Functions.plusMult(element.get())); - } - } - return result; - } - } - } - - @Override - public MatrixFlavor getFlavor() { - return MatrixFlavor.SPARSELIKE; - } -}
http://git-wip-us.apache.org/repos/asf/mahout/blob/99a5358f/math/src/main/java/org/apache/mahout/math/Swapper.java ---------------------------------------------------------------------- diff --git a/math/src/main/java/org/apache/mahout/math/Swapper.java b/math/src/main/java/org/apache/mahout/math/Swapper.java deleted file mode 100644 index 1ca3744..0000000 --- a/math/src/main/java/org/apache/mahout/math/Swapper.java +++ /dev/null @@ -1,35 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -/* -Copyright 1999 CERN - European Organization for Nuclear Research. -Permission to use, copy, modify, distribute and sell this software and its documentation for any purpose -is hereby granted without fee, provided that the above copyright notice appear in all copies and -that both that copyright notice and this permission notice appear in supporting documentation. -CERN makes no representations about the suitability of this software for any purpose. -It is provided "as is" without expressed or implied warranty. -*/ -package org.apache.mahout.math; - -/** - * Interface for an object that knows how to swap elements at two positions (a,b). - */ -public interface Swapper { - - /** Swaps the generic data g[a] with g[b]. */ - void swap(int a, int b); -} http://git-wip-us.apache.org/repos/asf/mahout/blob/99a5358f/math/src/main/java/org/apache/mahout/math/TransposedMatrixView.java ---------------------------------------------------------------------- diff --git a/math/src/main/java/org/apache/mahout/math/TransposedMatrixView.java b/math/src/main/java/org/apache/mahout/math/TransposedMatrixView.java deleted file mode 100644 index ede6f35..0000000 --- a/math/src/main/java/org/apache/mahout/math/TransposedMatrixView.java +++ /dev/null @@ -1,147 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.mahout.math; - -import org.apache.mahout.math.flavor.BackEnum; -import org.apache.mahout.math.flavor.MatrixFlavor; -import org.apache.mahout.math.flavor.TraversingStructureEnum; -import org.apache.mahout.math.function.DoubleDoubleFunction; -import org.apache.mahout.math.function.DoubleFunction; - -/** - * Matrix View backed by an {@link org.apache.mahout.math.function.IntIntFunction} - */ -public class TransposedMatrixView extends AbstractMatrix { - - private Matrix m; - - public TransposedMatrixView(Matrix m) { - super(m.numCols(), m.numRows()); - this.m = m; - } - - @Override - public Matrix assignColumn(int column, Vector other) { - m.assignRow(column,other); - return this; - } - - @Override - public Matrix assignRow(int row, Vector other) { - m.assignColumn(row,other); - return this; - } - - @Override - public double getQuick(int row, int column) { - return m.getQuick(column,row); - } - - @Override - public Matrix like() { - return m.like(rows, columns); - } - - @Override - public Matrix like(int rows, int columns) { - return m.like(rows,columns); - } - - @Override - public void setQuick(int row, int column, double value) { - m.setQuick(column, row, value); - } - - @Override - public Vector viewRow(int row) { - return m.viewColumn(row); - } - - @Override - public Vector viewColumn(int column) { - return m.viewRow(column); - } - - @Override - public Matrix assign(double value) { - return m.assign(value); - } - - @Override - public Matrix assign(Matrix other, DoubleDoubleFunction function) { - if (other instanceof TransposedMatrixView) { - m.assign(((TransposedMatrixView) other).m, function); - } else { - m.assign(new TransposedMatrixView(other), function); - } - return this; - } - - @Override - public Matrix assign(Matrix other) { - if (other instanceof TransposedMatrixView) { - return m.assign(((TransposedMatrixView) other).m); - } else { - return m.assign(new TransposedMatrixView(other)); - } - } - - @Override - public Matrix assign(DoubleFunction function) { - return m.assign(function); - } - - @Override - public MatrixFlavor getFlavor() { - return flavor; - } - - private MatrixFlavor flavor = new MatrixFlavor() { - @Override - public BackEnum getBacking() { - return m.getFlavor().getBacking(); - } - - @Override - public TraversingStructureEnum getStructure() { - TraversingStructureEnum flavor = m.getFlavor().getStructure(); - switch (flavor) { - case COLWISE: - return TraversingStructureEnum.ROWWISE; - case SPARSECOLWISE: - return TraversingStructureEnum.SPARSEROWWISE; - case ROWWISE: - return TraversingStructureEnum.COLWISE; - case SPARSEROWWISE: - return TraversingStructureEnum.SPARSECOLWISE; - default: - return flavor; - } - } - - @Override - public boolean isDense() { - return m.getFlavor().isDense(); - } - }; - - Matrix getDelegate() { - return m; - } - -} http://git-wip-us.apache.org/repos/asf/mahout/blob/99a5358f/math/src/main/java/org/apache/mahout/math/UpperTriangular.java ---------------------------------------------------------------------- diff --git a/math/src/main/java/org/apache/mahout/math/UpperTriangular.java b/math/src/main/java/org/apache/mahout/math/UpperTriangular.java deleted file mode 100644 index 29fa6a0..0000000 --- a/math/src/main/java/org/apache/mahout/math/UpperTriangular.java +++ /dev/null @@ -1,160 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.mahout.math; - -import org.apache.mahout.math.flavor.BackEnum; -import org.apache.mahout.math.flavor.MatrixFlavor; -import org.apache.mahout.math.flavor.TraversingStructureEnum; - -/** - * - * Quick and dirty implementation of some {@link org.apache.mahout.math.Matrix} methods - * over packed upper triangular matrix. - * - */ -public class UpperTriangular extends AbstractMatrix { - - private static final double EPSILON = 1.0e-12; // assume anything less than - // that to be 0 during - // non-upper assignments - - private double[] values; - - /** - * represents n x n upper triangular matrix - * - * @param n - */ - - public UpperTriangular(int n) { - super(n, n); - values = new double[n * (n + 1) / 2]; - } - - public UpperTriangular(double[] data, boolean shallow) { - this(elementsToMatrixSize(data != null ? data.length : 0)); - if (data == null) { - throw new IllegalArgumentException("data"); - } - values = shallow ? data : data.clone(); - } - - public UpperTriangular(Vector data) { - this(elementsToMatrixSize(data.size())); - - for (Vector.Element el:data.nonZeroes()) { - values[el.index()] = el.get(); - } - } - - private static int elementsToMatrixSize(int dataSize) { - return (int) Math.round((-1 + Math.sqrt(1 + 8 * dataSize)) / 2); - } - - // copy-constructor - public UpperTriangular(UpperTriangular mx) { - this(mx.values, false); - } - - @Override - public Matrix assignColumn(int column, Vector other) { - if (columnSize() != other.size()) { - throw new IndexException(columnSize(), other.size()); - } - if (other.viewPart(column + 1, other.size() - column - 1).norm(1) > 1.0e-14) { - throw new IllegalArgumentException("Cannot set lower portion of triangular matrix to non-zero"); - } - for (Vector.Element element : other.viewPart(0, column).all()) { - setQuick(element.index(), column, element.get()); - } - return this; - } - - @Override - public Matrix assignRow(int row, Vector other) { - if (columnSize() != other.size()) { - throw new IndexException(numCols(), other.size()); - } - for (int i = 0; i < row; i++) { - if (Math.abs(other.getQuick(i)) > EPSILON) { - throw new IllegalArgumentException("non-triangular source"); - } - } - for (int i = row; i < rows; i++) { - setQuick(row, i, other.get(i)); - } - return this; - } - - public Matrix assignNonZeroElementsInRow(int row, double[] other) { - System.arraycopy(other, row, values, getL(row, row), rows - row); - return this; - } - - @Override - public double getQuick(int row, int column) { - if (row > column) { - return 0; - } - int i = getL(row, column); - return values[i]; - } - - private int getL(int row, int col) { - /* - * each row starts with some zero elements that we don't store. this - * accumulates an offset of (row+1)*row/2 - */ - return col + row * numCols() - (row + 1) * row / 2; - } - - @Override - public Matrix like() { - return like(rowSize(), columnSize()); - } - - @Override - public Matrix like(int rows, int columns) { - return new DenseMatrix(rows, columns); - } - - @Override - public void setQuick(int row, int column, double value) { - values[getL(row, column)] = value; - } - - @Override - public int[] getNumNondefaultElements() { - throw new UnsupportedOperationException(); - } - - @Override - public Matrix viewPart(int[] offset, int[] size) { - return new MatrixView(this, offset, size); - } - - public double[] getData() { - return values; - } - - @Override - public MatrixFlavor getFlavor() { - // We kind of consider ourselves a vector-backed but dense matrix for mmul, etc. purposes. - return new MatrixFlavor.FlavorImpl(BackEnum.JVMMEM, TraversingStructureEnum.VECTORBACKED, true); - } -} http://git-wip-us.apache.org/repos/asf/mahout/blob/99a5358f/math/src/main/java/org/apache/mahout/math/Vector.java ---------------------------------------------------------------------- diff --git a/math/src/main/java/org/apache/mahout/math/Vector.java b/math/src/main/java/org/apache/mahout/math/Vector.java deleted file mode 100644 index c3b1dc9..0000000 --- a/math/src/main/java/org/apache/mahout/math/Vector.java +++ /dev/null @@ -1,434 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.mahout.math; - - -import org.apache.mahout.math.function.DoubleDoubleFunction; -import org.apache.mahout.math.function.DoubleFunction; - -/** - * The basic interface including numerous convenience functions <p> NOTE: All implementing classes must have a - * constructor that takes an int for cardinality and a no-arg constructor that can be used for marshalling the Writable - * instance <p> NOTE: Implementations may choose to reuse the Vector.Element in the Iterable methods - */ -public interface Vector extends Cloneable { - - /** @return a formatted String suitable for output */ - String asFormatString(); - - /** - * Assign the value to all elements of the receiver - * - * @param value a double value - * @return the modified receiver - */ - Vector assign(double value); - - /** - * Assign the values to the receiver - * - * @param values a double[] of values - * @return the modified receiver - * @throws CardinalityException if the cardinalities differ - */ - Vector assign(double[] values); - - /** - * Assign the other vector values to the receiver - * - * @param other a Vector - * @return the modified receiver - * @throws CardinalityException if the cardinalities differ - */ - Vector assign(Vector other); - - /** - * Apply the function to each element of the receiver - * - * @param function a DoubleFunction to apply - * @return the modified receiver - */ - Vector assign(DoubleFunction function); - - /** - * Apply the function to each element of the receiver and the corresponding element of the other argument - * - * @param other a Vector containing the second arguments to the function - * @param function a DoubleDoubleFunction to apply - * @return the modified receiver - * @throws CardinalityException if the cardinalities differ - */ - Vector assign(Vector other, DoubleDoubleFunction function); - - /** - * Apply the function to each element of the receiver, using the y value as the second argument of the - * DoubleDoubleFunction - * - * @param f a DoubleDoubleFunction to be applied - * @param y a double value to be argument to the function - * @return the modified receiver - */ - Vector assign(DoubleDoubleFunction f, double y); - - /** - * Return the cardinality of the recipient (the maximum number of values) - * - * @return an int - */ - int size(); - - /** - * true if this implementation should be considered dense -- that it explicitly - * represents every value - * - * @return true or false - */ - boolean isDense(); - - /** - * true if this implementation should be considered to be iterable in index order in an efficient way. - * In particular this implies that {@link #all()} and {@link #nonZeroes()} ()} return elements - * in ascending order by index. - * - * @return true iff this implementation should be considered to be iterable in index order in an efficient way. - */ - boolean isSequentialAccess(); - - /** - * Return a copy of the recipient - * - * @return a new Vector - */ - @SuppressWarnings("CloneDoesntDeclareCloneNotSupportedException") - Vector clone(); - - Iterable<Element> all(); - - Iterable<Element> nonZeroes(); - - /** - * Return an object of Vector.Element representing an element of this Vector. Useful when designing new iterator - * types. - * - * @param index Index of the Vector.Element required - * @return The Vector.Element Object - */ - Element getElement(int index); - - /** - * Merge a set of (index, value) pairs into the vector. - * @param updates an ordered mapping of indices to values to be merged in. - */ - void mergeUpdates(OrderedIntDoubleMapping updates); - - /** - * A holder for information about a specific item in the Vector. <p> - * When using with an Iterator, the implementation - * may choose to reuse this element, so you may need to make a copy if you want to keep it - */ - interface Element { - - /** @return the value of this vector element. */ - double get(); - - /** @return the index of this vector element. */ - int index(); - - /** @param value Set the current element to value. */ - void set(double value); - } - - /** - * Return a new vector containing the values of the recipient divided by the argument - * - * @param x a double value - * @return a new Vector - */ - Vector divide(double x); - - /** - * Return the dot product of the recipient and the argument - * - * @param x a Vector - * @return a new Vector - * @throws CardinalityException if the cardinalities differ - */ - double dot(Vector x); - - /** - * Return the value at the given index - * - * @param index an int index - * @return the double at the index - * @throws IndexException if the index is out of bounds - */ - double get(int index); - - /** - * Return the value at the given index, without checking bounds - * - * @param index an int index - * @return the double at the index - */ - double getQuick(int index); - - /** - * Return an empty vector of the same underlying class as the receiver - * - * @return a Vector - */ - Vector like(); - - /** - * Return a new empty vector of the same underlying class as the receiver with given cardinality - * - * @param cardinality - size of vector - * @return {@link Vector} - */ - Vector like(int cardinality); - - /** - * Return a new vector containing the element by element difference of the recipient and the argument - * - * @param x a Vector - * @return a new Vector - * @throws CardinalityException if the cardinalities differ - */ - Vector minus(Vector x); - - /** - * Return a new vector containing the normalized (L_2 norm) values of the recipient - * - * @return a new Vector - */ - Vector normalize(); - - /** - * Return a new Vector containing the normalized (L_power norm) values of the recipient. <p> - * See - * http://en.wikipedia.org/wiki/Lp_space <p> - * Technically, when {@code 0 < power < 1}, we don't have a norm, just a metric, - * but we'll overload this here. <p> - * Also supports {@code power == 0} (number of non-zero elements) and power = {@link - * Double#POSITIVE_INFINITY} (max element). Again, see the Wikipedia page for more info - * - * @param power The power to use. Must be >= 0. May also be {@link Double#POSITIVE_INFINITY}. See the Wikipedia link - * for more on this. - * @return a new Vector x such that norm(x, power) == 1 - */ - Vector normalize(double power); - - /** - * Return a new vector containing the log(1 + entry)/ L_2 norm values of the recipient - * - * @return a new Vector - */ - Vector logNormalize(); - - /** - * Return a new Vector with a normalized value calculated as log_power(1 + entry)/ L_power norm. <p> - * - * @param power The power to use. Must be > 1. Cannot be {@link Double#POSITIVE_INFINITY}. - * @return a new Vector - */ - Vector logNormalize(double power); - - /** - * Return the k-norm of the vector. <p/> See http://en.wikipedia.org/wiki/Lp_space <p> - * Technically, when {@code 0 > power < 1}, we don't have a norm, just a metric, but we'll overload this here. Also supports power == 0 (number of - * non-zero elements) and power = {@link Double#POSITIVE_INFINITY} (max element). Again, see the Wikipedia page for - * more info. - * - * @param power The power to use. - * @see #normalize(double) - */ - double norm(double power); - - /** @return The minimum value in the Vector */ - double minValue(); - - /** @return The index of the minimum value */ - int minValueIndex(); - - /** @return The maximum value in the Vector */ - double maxValue(); - - /** @return The index of the maximum value */ - int maxValueIndex(); - - /** - * Return a new vector containing the sum of each value of the recipient and the argument - * - * @param x a double - * @return a new Vector - */ - Vector plus(double x); - - /** - * Return a new vector containing the element by element sum of the recipient and the argument - * - * @param x a Vector - * @return a new Vector - * @throws CardinalityException if the cardinalities differ - */ - Vector plus(Vector x); - - /** - * Set the value at the given index - * - * @param index an int index into the receiver - * @param value a double value to set - * @throws IndexException if the index is out of bounds - */ - void set(int index, double value); - - /** - * Set the value at the given index, without checking bounds - * - * @param index an int index into the receiver - * @param value a double value to set - */ - void setQuick(int index, double value); - - /** - * Increment the value at the given index by the given value. - * - * @param index an int index into the receiver - * @param increment sets the value at the given index to value + increment; - */ - void incrementQuick(int index, double increment); - - /** - * Return the number of values in the recipient which are not the default value. For instance, for a - * sparse vector, this would be the number of non-zero values. - * - * @return an int - */ - int getNumNondefaultElements(); - - /** - * Return the number of non zero elements in the vector. - * - * @return an int - */ - int getNumNonZeroElements(); - - /** - * Return a new vector containing the product of each value of the recipient and the argument - * - * @param x a double argument - * @return a new Vector - */ - Vector times(double x); - - /** - * Return a new vector containing the element-wise product of the recipient and the argument - * - * @param x a Vector argument - * @return a new Vector - * @throws CardinalityException if the cardinalities differ - */ - Vector times(Vector x); - - /** - * Return a new vector containing the subset of the recipient - * - * @param offset an int offset into the receiver - * @param length the cardinality of the desired result - * @return a new Vector - * @throws CardinalityException if the length is greater than the cardinality of the receiver - * @throws IndexException if the offset is negative or the offset+length is outside of the receiver - */ - Vector viewPart(int offset, int length); - - /** - * Return the sum of all the elements of the receiver - * - * @return a double - */ - double zSum(); - - /** - * Return the cross product of the receiver and the other vector - * - * @param other another Vector - * @return a Matrix - */ - Matrix cross(Vector other); - - /* - * Need stories for these but keeping them here for now. - */ - // void getNonZeros(IntArrayList jx, DoubleArrayList values); - // void foreachNonZero(IntDoubleFunction f); - // DoubleDoubleFunction map); - // NewVector assign(Vector y, DoubleDoubleFunction function, IntArrayList - // nonZeroIndexes); - - /** - * Examples speak louder than words: aggregate(plus, pow(2)) is another way to say - * getLengthSquared(), aggregate(max, abs) is norm(Double.POSITIVE_INFINITY). To sum all of the positive values, - * aggregate(plus, max(0)). - * @param aggregator used to combine the current value of the aggregation with the result of map.apply(nextValue) - * @param map a function to apply to each element of the vector in turn before passing to the aggregator - * @return the final aggregation - */ - double aggregate(DoubleDoubleFunction aggregator, DoubleFunction map); - - /** - * <p>Generalized inner product - take two vectors, iterate over them both, using the combiner to combine together - * (and possibly map in some way) each pair of values, which are then aggregated with the previous accumulated - * value in the combiner.</p> - * <p> - * Example: dot(other) could be expressed as aggregate(other, Plus, Times), and kernelized inner products (which - * are symmetric on the indices) work similarly. - * @param other a vector to aggregate in combination with - * @param aggregator function we're aggregating with; fa - * @param combiner function we're combining with; fc - * @return the final aggregation; {@code if r0 = fc(this[0], other[0]), ri = fa(r_{i-1}, fc(this[i], other[i])) - * for all i > 0} - */ - double aggregate(Vector other, DoubleDoubleFunction aggregator, DoubleDoubleFunction combiner); - - /** - * Return the sum of squares of all elements in the vector. Square root of - * this value is the length of the vector. - */ - double getLengthSquared(); - - /** - * Get the square of the distance between this vector and the other vector. - */ - double getDistanceSquared(Vector v); - - /** - * Gets an estimate of the cost (in number of operations) it takes to lookup a random element in this vector. - */ - double getLookupCost(); - - /** - * Gets an estimate of the cost (in number of operations) it takes to advance an iterator through the nonzero - * elements of this vector. - */ - double getIteratorAdvanceCost(); - - /** - * Return true iff adding a new (nonzero) element takes constant time for this vector. - */ - boolean isAddConstantTime(); -} http://git-wip-us.apache.org/repos/asf/mahout/blob/99a5358f/math/src/main/java/org/apache/mahout/math/VectorBinaryAggregate.java ---------------------------------------------------------------------- diff --git a/math/src/main/java/org/apache/mahout/math/VectorBinaryAggregate.java b/math/src/main/java/org/apache/mahout/math/VectorBinaryAggregate.java deleted file mode 100644 index 4d3a80f..0000000 --- a/math/src/main/java/org/apache/mahout/math/VectorBinaryAggregate.java +++ /dev/null @@ -1,481 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.mahout.math; - -import org.apache.mahout.math.function.DoubleDoubleFunction; -import org.apache.mahout.math.set.OpenIntHashSet; - -import java.util.Iterator; - -/** - * Abstract class encapsulating different algorithms that perform the Vector operations aggregate(). - * x.aggregte(y, fa, fc), for x and y Vectors and fa, fc DoubleDouble functions: - * - applies the function fc to every element in x and y, fc(xi, yi) - * - constructs a result iteratively, r0 = fc(x0, y0), ri = fc(r_{i-1}, fc(xi, yi)). - * This works essentially like a map/reduce functional combo. - * - * The names of variables, methods and classes used here follow the following conventions: - * The vector being assigned to (the left hand side) is called this or x. - * The right hand side is called that or y. - * The aggregating (reducing) function to be applied is called fa. - * The combining (mapping) function to be applied is called fc. - * - * The different algorithms take into account the different characteristics of vector classes: - * - whether the vectors support sequential iteration (isSequential()) - * - what the lookup cost is (getLookupCost()) - * - what the iterator advancement cost is (getIteratorAdvanceCost()) - * - * The names of the actual classes (they're nested in VectorBinaryAssign) describe the used for assignment. - * The most important optimization is iterating just through the nonzeros (only possible if f(0, 0) = 0). - * There are 4 main possibilities: - * - iterating through the nonzeros of just one vector and looking up the corresponding elements in the other - * - iterating through the intersection of nonzeros (those indices where both vectors have nonzero values) - * - iterating through the union of nonzeros (those indices where at least one of the vectors has a nonzero value) - * - iterating through all the elements in some way (either through both at the same time, both one after the other, - * looking up both, looking up just one). - * - * The internal details are not important and a particular algorithm should generally not be called explicitly. - * The best one will be selected through assignBest(), which is itself called through Vector.assign(). - * - * See https://docs.google.com/document/d/1g1PjUuvjyh2LBdq2_rKLIcUiDbeOORA1sCJiSsz-JVU/edit# for a more detailed - * explanation. - */ -public abstract class VectorBinaryAggregate { - public static final VectorBinaryAggregate[] OPERATIONS = { - new AggregateNonzerosIterateThisLookupThat(), - new AggregateNonzerosIterateThatLookupThis(), - - new AggregateIterateIntersection(), - - new AggregateIterateUnionSequential(), - new AggregateIterateUnionRandom(), - - new AggregateAllIterateSequential(), - new AggregateAllIterateThisLookupThat(), - new AggregateAllIterateThatLookupThis(), - new AggregateAllLoop(), - }; - - /** - * Returns true iff we can use this algorithm to apply fc to x and y component-wise and aggregate the result using fa. - */ - public abstract boolean isValid(Vector x, Vector y, DoubleDoubleFunction fa, DoubleDoubleFunction fc); - - /** - * Estimates the cost of using this algorithm to compute the aggregation. The algorithm is assumed to be valid. - */ - public abstract double estimateCost(Vector x, Vector y, DoubleDoubleFunction fa, DoubleDoubleFunction fc); - - /** - * Main method that applies fc to x and y component-wise aggregating the results with fa. It returns the result of - * the aggregation. - */ - public abstract double aggregate(Vector x, Vector y, DoubleDoubleFunction fa, DoubleDoubleFunction fc); - - /** - * The best operation is the least expensive valid one. - */ - public static VectorBinaryAggregate getBestOperation(Vector x, Vector y, DoubleDoubleFunction fa, - DoubleDoubleFunction fc) { - int bestOperationIndex = -1; - double bestCost = Double.POSITIVE_INFINITY; - for (int i = 0; i < OPERATIONS.length; ++i) { - if (OPERATIONS[i].isValid(x, y, fa, fc)) { - double cost = OPERATIONS[i].estimateCost(x, y, fa, fc); - if (cost < bestCost) { - bestCost = cost; - bestOperationIndex = i; - } - } - } - return OPERATIONS[bestOperationIndex]; - } - - /** - * This is the method that should be used when aggregating. It selects the best algorithm and applies it. - */ - public static double aggregateBest(Vector x, Vector y, DoubleDoubleFunction fa, DoubleDoubleFunction fc) { - return getBestOperation(x, y, fa, fc).aggregate(x, y, fa, fc); - } - - public static class AggregateNonzerosIterateThisLookupThat extends VectorBinaryAggregate { - - @Override - public boolean isValid(Vector x, Vector y, DoubleDoubleFunction fa, DoubleDoubleFunction fc) { - return fa.isLikeRightPlus() && (fa.isAssociativeAndCommutative() || x.isSequentialAccess()) - && fc.isLikeLeftMult(); - } - - @Override - public double estimateCost(Vector x, Vector y, DoubleDoubleFunction fa, DoubleDoubleFunction fc) { - return x.getNumNondefaultElements() * x.getIteratorAdvanceCost() * y.getLookupCost(); - } - - @Override - public double aggregate(Vector x, Vector y, DoubleDoubleFunction fa, DoubleDoubleFunction fc) { - Iterator<Vector.Element> xi = x.nonZeroes().iterator(); - if (!xi.hasNext()) { - return 0; - } - Vector.Element xe = xi.next(); - double result = fc.apply(xe.get(), y.getQuick(xe.index())); - while (xi.hasNext()) { - xe = xi.next(); - result = fa.apply(result, fc.apply(xe.get(), y.getQuick(xe.index()))); - } - return result; - } - } - - public static class AggregateNonzerosIterateThatLookupThis extends VectorBinaryAggregate { - - @Override - public boolean isValid(Vector x, Vector y, DoubleDoubleFunction fa, DoubleDoubleFunction fc) { - return fa.isLikeRightPlus() && (fa.isAssociativeAndCommutative() || y.isSequentialAccess()) - && fc.isLikeRightMult(); - } - - @Override - public double estimateCost(Vector x, Vector y, DoubleDoubleFunction fa, DoubleDoubleFunction fc) { - return y.getNumNondefaultElements() * y.getIteratorAdvanceCost() * x.getLookupCost() * x.getLookupCost(); - } - - @Override - public double aggregate(Vector x, Vector y, DoubleDoubleFunction fa, DoubleDoubleFunction fc) { - Iterator<Vector.Element> yi = y.nonZeroes().iterator(); - if (!yi.hasNext()) { - return 0; - } - Vector.Element ye = yi.next(); - double result = fc.apply(x.getQuick(ye.index()), ye.get()); - while (yi.hasNext()) { - ye = yi.next(); - result = fa.apply(result, fc.apply(x.getQuick(ye.index()), ye.get())); - } - return result; - } - } - - public static class AggregateIterateIntersection extends VectorBinaryAggregate { - - @Override - public boolean isValid(Vector x, Vector y, DoubleDoubleFunction fa, DoubleDoubleFunction fc) { - return fa.isLikeRightPlus() && fc.isLikeMult() && x.isSequentialAccess() && y.isSequentialAccess(); - } - - @Override - public double estimateCost(Vector x, Vector y, DoubleDoubleFunction fa, DoubleDoubleFunction fc) { - return Math.min(x.getNumNondefaultElements() * x.getIteratorAdvanceCost(), - y.getNumNondefaultElements() * y.getIteratorAdvanceCost()); - } - - @Override - public double aggregate(Vector x, Vector y, DoubleDoubleFunction fa, DoubleDoubleFunction fc) { - Iterator<Vector.Element> xi = x.nonZeroes().iterator(); - Iterator<Vector.Element> yi = y.nonZeroes().iterator(); - Vector.Element xe = null; - Vector.Element ye = null; - boolean advanceThis = true; - boolean advanceThat = true; - boolean validResult = false; - double result = 0; - while (true) { - if (advanceThis) { - if (xi.hasNext()) { - xe = xi.next(); - } else { - break; - } - } - if (advanceThat) { - if (yi.hasNext()) { - ye = yi.next(); - } else { - break; - } - } - if (xe.index() == ye.index()) { - double thisResult = fc.apply(xe.get(), ye.get()); - if (validResult) { - result = fa.apply(result, thisResult); - } else { - result = thisResult; - validResult = true; - } - advanceThis = true; - advanceThat = true; - } else { - if (xe.index() < ye.index()) { // f(x, 0) = 0 - advanceThis = true; - advanceThat = false; - } else { // f(0, y) = 0 - advanceThis = false; - advanceThat = true; - } - } - } - return result; - } - } - - public static class AggregateIterateUnionSequential extends VectorBinaryAggregate { - - @Override - public boolean isValid(Vector x, Vector y, DoubleDoubleFunction fa, DoubleDoubleFunction fc) { - return fa.isLikeRightPlus() && !fc.isDensifying() - && x.isSequentialAccess() && y.isSequentialAccess(); - } - - @Override - public double estimateCost(Vector x, Vector y, DoubleDoubleFunction fa, DoubleDoubleFunction fc) { - return Math.max(x.getNumNondefaultElements() * x.getIteratorAdvanceCost(), - y.getNumNondefaultElements() * y.getIteratorAdvanceCost()); - } - - @Override - public double aggregate(Vector x, Vector y, DoubleDoubleFunction fa, DoubleDoubleFunction fc) { - Iterator<Vector.Element> xi = x.nonZeroes().iterator(); - Iterator<Vector.Element> yi = y.nonZeroes().iterator(); - Vector.Element xe = null; - Vector.Element ye = null; - boolean advanceThis = true; - boolean advanceThat = true; - boolean validResult = false; - double result = 0; - while (true) { - if (advanceThis) { - if (xi.hasNext()) { - xe = xi.next(); - } else { - xe = null; - } - } - if (advanceThat) { - if (yi.hasNext()) { - ye = yi.next(); - } else { - ye = null; - } - } - double thisResult; - if (xe != null && ye != null) { // both vectors have nonzero elements - if (xe.index() == ye.index()) { - thisResult = fc.apply(xe.get(), ye.get()); - advanceThis = true; - advanceThat = true; - } else { - if (xe.index() < ye.index()) { // f(x, 0) - thisResult = fc.apply(xe.get(), 0); - advanceThis = true; - advanceThat = false; - } else { - thisResult = fc.apply(0, ye.get()); - advanceThis = false; - advanceThat = true; - } - } - } else if (xe != null) { // just the first one still has nonzeros - thisResult = fc.apply(xe.get(), 0); - advanceThis = true; - advanceThat = false; - } else if (ye != null) { // just the second one has nonzeros - thisResult = fc.apply(0, ye.get()); - advanceThis = false; - advanceThat = true; - } else { // we're done, both are empty - break; - } - if (validResult) { - result = fa.apply(result, thisResult); - } else { - result = thisResult; - validResult = true; - } - } - return result; - } - } - - public static class AggregateIterateUnionRandom extends VectorBinaryAggregate { - - @Override - public boolean isValid(Vector x, Vector y, DoubleDoubleFunction fa, DoubleDoubleFunction fc) { - return fa.isLikeRightPlus() && !fc.isDensifying() - && (fa.isAssociativeAndCommutative() || (x.isSequentialAccess() && y.isSequentialAccess())); - } - - @Override - public double estimateCost(Vector x, Vector y, DoubleDoubleFunction fa, DoubleDoubleFunction fc) { - return Math.max(x.getNumNondefaultElements() * x.getIteratorAdvanceCost() * y.getLookupCost(), - y.getNumNondefaultElements() * y.getIteratorAdvanceCost() * x.getLookupCost()); - } - - @Override - public double aggregate(Vector x, Vector y, DoubleDoubleFunction fa, DoubleDoubleFunction fc) { - OpenIntHashSet visited = new OpenIntHashSet(); - Iterator<Vector.Element> xi = x.nonZeroes().iterator(); - boolean validResult = false; - double result = 0; - double thisResult; - while (xi.hasNext()) { - Vector.Element xe = xi.next(); - thisResult = fc.apply(xe.get(), y.getQuick(xe.index())); - if (validResult) { - result = fa.apply(result, thisResult); - } else { - result = thisResult; - validResult = true; - } - visited.add(xe.index()); - } - Iterator<Vector.Element> yi = y.nonZeroes().iterator(); - while (yi.hasNext()) { - Vector.Element ye = yi.next(); - if (!visited.contains(ye.index())) { - thisResult = fc.apply(x.getQuick(ye.index()), ye.get()); - if (validResult) { - result = fa.apply(result, thisResult); - } else { - result = thisResult; - validResult = true; - } - } - } - return result; - } - } - - public static class AggregateAllIterateSequential extends VectorBinaryAggregate { - - @Override - public boolean isValid(Vector x, Vector y, DoubleDoubleFunction fa, DoubleDoubleFunction fc) { - return x.isSequentialAccess() && y.isSequentialAccess() && !x.isDense() && !y.isDense(); - } - - @Override - public double estimateCost(Vector x, Vector y, DoubleDoubleFunction fa, DoubleDoubleFunction fc) { - return Math.max(x.size() * x.getIteratorAdvanceCost(), y.size() * y.getIteratorAdvanceCost()); - } - - @Override - public double aggregate(Vector x, Vector y, DoubleDoubleFunction fa, DoubleDoubleFunction fc) { - Iterator<Vector.Element> xi = x.all().iterator(); - Iterator<Vector.Element> yi = y.all().iterator(); - boolean validResult = false; - double result = 0; - while (xi.hasNext() && yi.hasNext()) { - Vector.Element xe = xi.next(); - double thisResult = fc.apply(xe.get(), yi.next().get()); - if (validResult) { - result = fa.apply(result, thisResult); - } else { - result = thisResult; - validResult = true; - } - } - return result; - } - } - - public static class AggregateAllIterateThisLookupThat extends VectorBinaryAggregate { - - @Override - public boolean isValid(Vector x, Vector y, DoubleDoubleFunction fa, DoubleDoubleFunction fc) { - return (fa.isAssociativeAndCommutative() || x.isSequentialAccess()) - && !x.isDense(); - } - - @Override - public double estimateCost(Vector x, Vector y, DoubleDoubleFunction fa, DoubleDoubleFunction fc) { - return x.size() * x.getIteratorAdvanceCost() * y.getLookupCost(); - } - - @Override - public double aggregate(Vector x, Vector y, DoubleDoubleFunction fa, DoubleDoubleFunction fc) { - Iterator<Vector.Element> xi = x.all().iterator(); - boolean validResult = false; - double result = 0; - while (xi.hasNext()) { - Vector.Element xe = xi.next(); - double thisResult = fc.apply(xe.get(), y.getQuick(xe.index())); - if (validResult) { - result = fa.apply(result, thisResult); - } else { - result = thisResult; - validResult = true; - } - } - return result; - } - } - - public static class AggregateAllIterateThatLookupThis extends VectorBinaryAggregate { - - @Override - public boolean isValid(Vector x, Vector y, DoubleDoubleFunction fa, DoubleDoubleFunction fc) { - return (fa.isAssociativeAndCommutative() || y.isSequentialAccess()) - && !y.isDense(); - } - - @Override - public double estimateCost(Vector x, Vector y, DoubleDoubleFunction fa, DoubleDoubleFunction fc) { - return y.size() * y.getIteratorAdvanceCost() * x.getLookupCost(); - } - - @Override - public double aggregate(Vector x, Vector y, DoubleDoubleFunction fa, DoubleDoubleFunction fc) { - Iterator<Vector.Element> yi = y.all().iterator(); - boolean validResult = false; - double result = 0; - while (yi.hasNext()) { - Vector.Element ye = yi.next(); - double thisResult = fc.apply(x.getQuick(ye.index()), ye.get()); - if (validResult) { - result = fa.apply(result, thisResult); - } else { - result = thisResult; - validResult = true; - } - } - return result; - } - } - - public static class AggregateAllLoop extends VectorBinaryAggregate { - - @Override - public boolean isValid(Vector x, Vector y, DoubleDoubleFunction fa, DoubleDoubleFunction fc) { - return true; - } - - @Override - public double estimateCost(Vector x, Vector y, DoubleDoubleFunction fa, DoubleDoubleFunction fc) { - return x.size() * x.getLookupCost() * y.getLookupCost(); - } - - @Override - public double aggregate(Vector x, Vector y, DoubleDoubleFunction fa, DoubleDoubleFunction fc) { - double result = fc.apply(x.getQuick(0), y.getQuick(0)); - int s = x.size(); - for (int i = 1; i < s; ++i) { - result = fa.apply(result, fc.apply(x.getQuick(i), y.getQuick(i))); - } - return result; - } - } -} http://git-wip-us.apache.org/repos/asf/mahout/blob/99a5358f/math/src/main/java/org/apache/mahout/math/VectorBinaryAssign.java ---------------------------------------------------------------------- diff --git a/math/src/main/java/org/apache/mahout/math/VectorBinaryAssign.java b/math/src/main/java/org/apache/mahout/math/VectorBinaryAssign.java deleted file mode 100644 index f24d552..0000000 --- a/math/src/main/java/org/apache/mahout/math/VectorBinaryAssign.java +++ /dev/null @@ -1,667 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.mahout.math; - -import org.apache.mahout.math.Vector.Element; -import org.apache.mahout.math.function.DoubleDoubleFunction; -import org.apache.mahout.math.set.OpenIntHashSet; - -import java.util.Iterator; - -/** - * Abstract class encapsulating different algorithms that perform the Vector operations assign(). - * x.assign(y, f), for x and y Vectors and f a DoubleDouble function: - * - applies the function f to every element in x and y, f(xi, yi) - * - assigns xi = f(xi, yi) for all indices i - * - * The names of variables, methods and classes used here follow the following conventions: - * The vector being assigned to (the left hand side) is called this or x. - * The right hand side is called that or y. - * The function to be applied is called f. - * - * The different algorithms take into account the different characteristics of vector classes: - * - whether the vectors support sequential iteration (isSequential()) - * - whether the vectors support constant-time additions (isAddConstantTime()) - * - what the lookup cost is (getLookupCost()) - * - what the iterator advancement cost is (getIteratorAdvanceCost()) - * - * The names of the actual classes (they're nested in VectorBinaryAssign) describe the used for assignment. - * The most important optimization is iterating just through the nonzeros (only possible if f(0, 0) = 0). - * There are 4 main possibilities: - * - iterating through the nonzeros of just one vector and looking up the corresponding elements in the other - * - iterating through the intersection of nonzeros (those indices where both vectors have nonzero values) - * - iterating through the union of nonzeros (those indices where at least one of the vectors has a nonzero value) - * - iterating through all the elements in some way (either through both at the same time, both one after the other, - * looking up both, looking up just one). - * Then, there are two additional sub-possibilities: - * - if a new value can be added to x in constant time (isAddConstantTime()), the *Inplace updates are used - * - otherwise (really just for SequentialAccessSparseVectors right now), the *Merge updates are used, where - * a sorted list of (index, value) pairs is merged into the vector at the end. - * - * The internal details are not important and a particular algorithm should generally not be called explicitly. - * The best one will be selected through assignBest(), which is itself called through Vector.assign(). - * - * See https://docs.google.com/document/d/1g1PjUuvjyh2LBdq2_rKLIcUiDbeOORA1sCJiSsz-JVU/edit# for a more detailed - * explanation. - */ -public abstract class VectorBinaryAssign { - public static final VectorBinaryAssign[] OPERATIONS = { - new AssignNonzerosIterateThisLookupThat(), - new AssignNonzerosIterateThatLookupThisMergeUpdates(), - new AssignNonzerosIterateThatLookupThisInplaceUpdates(), - - new AssignIterateIntersection(), - - new AssignIterateUnionSequentialMergeUpdates(), - new AssignIterateUnionSequentialInplaceUpdates(), - new AssignIterateUnionRandomMergeUpdates(), - new AssignIterateUnionRandomInplaceUpdates(), - - new AssignAllIterateSequentialMergeUpdates(), - new AssignAllIterateSequentialInplaceUpdates(), - new AssignAllIterateThisLookupThatMergeUpdates(), - new AssignAllIterateThisLookupThatInplaceUpdates(), - new AssignAllIterateThatLookupThisMergeUpdates(), - new AssignAllIterateThatLookupThisInplaceUpdates(), - new AssignAllLoopMergeUpdates(), - new AssignAllLoopInplaceUpdates(), - }; - - /** - * Returns true iff we can use this algorithm to apply f to x and y component-wise and assign the result to x. - */ - public abstract boolean isValid(Vector x, Vector y, DoubleDoubleFunction f); - - /** - * Estimates the cost of using this algorithm to compute the assignment. The algorithm is assumed to be valid. - */ - public abstract double estimateCost(Vector x, Vector y, DoubleDoubleFunction f); - - /** - * Main method that applies f to x and y component-wise assigning the results to x. It returns the modified vector, - * x. - */ - public abstract Vector assign(Vector x, Vector y, DoubleDoubleFunction f); - - /** - * The best operation is the least expensive valid one. - */ - public static VectorBinaryAssign getBestOperation(Vector x, Vector y, DoubleDoubleFunction f) { - int bestOperationIndex = -1; - double bestCost = Double.POSITIVE_INFINITY; - for (int i = 0; i < OPERATIONS.length; ++i) { - if (OPERATIONS[i].isValid(x, y, f)) { - double cost = OPERATIONS[i].estimateCost(x, y, f); - if (cost < bestCost) { - bestCost = cost; - bestOperationIndex = i; - } - } - } - return OPERATIONS[bestOperationIndex]; - } - - /** - * This is the method that should be used when assigning. It selects the best algorithm and applies it. - * Note that it does NOT invalidate the cached length of the Vector and should only be used through the wrapprs - * in AbstractVector. - */ - public static Vector assignBest(Vector x, Vector y, DoubleDoubleFunction f) { - return getBestOperation(x, y, f).assign(x, y, f); - } - - /** - * If f(0, y) = 0, the zeros in x don't matter and we can simply iterate through the nonzeros of x. - * To get the corresponding element of y, we perform a lookup. - * There are no *Merge or *Inplace versions because in this case x cannot become more dense because of f, meaning - * all changes will occur at indices whose values are already nonzero. - */ - public static class AssignNonzerosIterateThisLookupThat extends VectorBinaryAssign { - - @Override - public boolean isValid(Vector x, Vector y, DoubleDoubleFunction f) { - return f.isLikeLeftMult(); - } - - @Override - public double estimateCost(Vector x, Vector y, DoubleDoubleFunction f) { - return x.getNumNondefaultElements() * x.getIteratorAdvanceCost() * y.getLookupCost(); - } - - @Override - public Vector assign(Vector x, Vector y, DoubleDoubleFunction f) { - for (Element xe : x.nonZeroes()) { - xe.set(f.apply(xe.get(), y.getQuick(xe.index()))); - } - return x; - } - } - - /** - * If f(x, 0) = x, the zeros in y don't matter and we can simply iterate through the nonzeros of y. - * We get the corresponding element of x through a lookup and update x inplace. - */ - public static class AssignNonzerosIterateThatLookupThisInplaceUpdates extends VectorBinaryAssign { - - @Override - public boolean isValid(Vector x, Vector y, DoubleDoubleFunction f) { - return f.isLikeRightPlus(); - } - - @Override - public double estimateCost(Vector x, Vector y, DoubleDoubleFunction f) { - return y.getNumNondefaultElements() * y.getIteratorAdvanceCost() * x.getLookupCost() * x.getLookupCost(); - } - - @Override - public Vector assign(Vector x, Vector y, DoubleDoubleFunction f) { - for (Element ye : y.nonZeroes()) { - x.setQuick(ye.index(), f.apply(x.getQuick(ye.index()), ye.get())); - } - return x; - } - } - - /** - * If f(x, 0) = x, the zeros in y don't matter and we can simply iterate through the nonzeros of y. - * We get the corresponding element of x through a lookup and update x by merging. - */ - public static class AssignNonzerosIterateThatLookupThisMergeUpdates extends VectorBinaryAssign { - - @Override - public boolean isValid(Vector x, Vector y, DoubleDoubleFunction f) { - return f.isLikeRightPlus() && y.isSequentialAccess() && !x.isAddConstantTime(); - } - - @Override - public double estimateCost(Vector x, Vector y, DoubleDoubleFunction f) { - return y.getNumNondefaultElements() * y.getIteratorAdvanceCost() * y.getLookupCost(); - } - - @Override - public Vector assign(Vector x, Vector y, DoubleDoubleFunction f) { - OrderedIntDoubleMapping updates = new OrderedIntDoubleMapping(false); - for (Element ye : y.nonZeroes()) { - updates.set(ye.index(), f.apply(x.getQuick(ye.index()), ye.get())); - } - x.mergeUpdates(updates); - return x; - } - } - - /** - * If f(x, 0) = x and f(0, y) = 0 the zeros in x and y don't matter and we can iterate through the nonzeros - * in both x and y. - * This is only possible if both x and y support sequential access. - */ - public static class AssignIterateIntersection extends VectorBinaryAssign { - - @Override - public boolean isValid(Vector x, Vector y, DoubleDoubleFunction f) { - return f.isLikeLeftMult() && f.isLikeRightPlus() && x.isSequentialAccess() && y.isSequentialAccess(); - } - - @Override - public double estimateCost(Vector x, Vector y, DoubleDoubleFunction f) { - return Math.min(x.getNumNondefaultElements() * x.getIteratorAdvanceCost(), - y.getNumNondefaultElements() * y.getIteratorAdvanceCost()); - } - - @Override - public Vector assign(Vector x, Vector y, DoubleDoubleFunction f) { - Iterator<Vector.Element> xi = x.nonZeroes().iterator(); - Iterator<Vector.Element> yi = y.nonZeroes().iterator(); - Vector.Element xe = null; - Vector.Element ye = null; - boolean advanceThis = true; - boolean advanceThat = true; - while (true) { - if (advanceThis) { - if (xi.hasNext()) { - xe = xi.next(); - } else { - break; - } - } - if (advanceThat) { - if (yi.hasNext()) { - ye = yi.next(); - } else { - break; - } - } - if (xe.index() == ye.index()) { - xe.set(f.apply(xe.get(), ye.get())); - advanceThis = true; - advanceThat = true; - } else { - if (xe.index() < ye.index()) { // f(x, 0) = 0 - advanceThis = true; - advanceThat = false; - } else { // f(0, y) = 0 - advanceThis = false; - advanceThat = true; - } - } - } - return x; - } - } - - /** - * If f(0, 0) = 0 we can iterate through the nonzeros in either x or y. - * In this case we iterate through them in parallel and update x by merging. Because we're iterating through - * both vectors at the same time, x and y need to support sequential access. - */ - public static class AssignIterateUnionSequentialMergeUpdates extends VectorBinaryAssign { - - @Override - public boolean isValid(Vector x, Vector y, DoubleDoubleFunction f) { - return !f.isDensifying() && x.isSequentialAccess() && y.isSequentialAccess() && !x.isAddConstantTime(); - } - - @Override - public double estimateCost(Vector x, Vector y, DoubleDoubleFunction f) { - return Math.max(x.getNumNondefaultElements() * x.getIteratorAdvanceCost(), - y.getNumNondefaultElements() * y.getIteratorAdvanceCost()); - } - - @Override - public Vector assign(Vector x, Vector y, DoubleDoubleFunction f) { - Iterator<Vector.Element> xi = x.nonZeroes().iterator(); - Iterator<Vector.Element> yi = y.nonZeroes().iterator(); - Vector.Element xe = null; - Vector.Element ye = null; - boolean advanceThis = true; - boolean advanceThat = true; - OrderedIntDoubleMapping updates = new OrderedIntDoubleMapping(false); - while (true) { - if (advanceThis) { - if (xi.hasNext()) { - xe = xi.next(); - } else { - xe = null; - } - } - if (advanceThat) { - if (yi.hasNext()) { - ye = yi.next(); - } else { - ye = null; - } - } - if (xe != null && ye != null) { // both vectors have nonzero elements - if (xe.index() == ye.index()) { - xe.set(f.apply(xe.get(), ye.get())); - advanceThis = true; - advanceThat = true; - } else { - if (xe.index() < ye.index()) { // f(x, 0) - xe.set(f.apply(xe.get(), 0)); - advanceThis = true; - advanceThat = false; - } else { - updates.set(ye.index(), f.apply(0, ye.get())); - advanceThis = false; - advanceThat = true; - } - } - } else if (xe != null) { // just the first one still has nonzeros - xe.set(f.apply(xe.get(), 0)); - advanceThis = true; - advanceThat = false; - } else if (ye != null) { // just the second one has nonzeros - updates.set(ye.index(), f.apply(0, ye.get())); - advanceThis = false; - advanceThat = true; - } else { // we're done, both are empty - break; - } - } - x.mergeUpdates(updates); - return x; - } - } - - /** - * If f(0, 0) = 0 we can iterate through the nonzeros in either x or y. - * In this case we iterate through them in parallel and update x inplace. Because we're iterating through - * both vectors at the same time, x and y need to support sequential access. - */ - public static class AssignIterateUnionSequentialInplaceUpdates extends VectorBinaryAssign { - - @Override - public boolean isValid(Vector x, Vector y, DoubleDoubleFunction f) { - return !f.isDensifying() && x.isSequentialAccess() && y.isSequentialAccess() && x.isAddConstantTime(); - } - - @Override - public double estimateCost(Vector x, Vector y, DoubleDoubleFunction f) { - return Math.max(x.getNumNondefaultElements() * x.getIteratorAdvanceCost(), - y.getNumNondefaultElements() * y.getIteratorAdvanceCost()); - } - - @Override - public Vector assign(Vector x, Vector y, DoubleDoubleFunction f) { - Iterator<Vector.Element> xi = x.nonZeroes().iterator(); - Iterator<Vector.Element> yi = y.nonZeroes().iterator(); - Vector.Element xe = null; - Vector.Element ye = null; - boolean advanceThis = true; - boolean advanceThat = true; - while (true) { - if (advanceThis) { - if (xi.hasNext()) { - xe = xi.next(); - } else { - xe = null; - } - } - if (advanceThat) { - if (yi.hasNext()) { - ye = yi.next(); - } else { - ye = null; - } - } - if (xe != null && ye != null) { // both vectors have nonzero elements - if (xe.index() == ye.index()) { - xe.set(f.apply(xe.get(), ye.get())); - advanceThis = true; - advanceThat = true; - } else { - if (xe.index() < ye.index()) { // f(x, 0) - xe.set(f.apply(xe.get(), 0)); - advanceThis = true; - advanceThat = false; - } else { - x.setQuick(ye.index(), f.apply(0, ye.get())); - advanceThis = false; - advanceThat = true; - } - } - } else if (xe != null) { // just the first one still has nonzeros - xe.set(f.apply(xe.get(), 0)); - advanceThis = true; - advanceThat = false; - } else if (ye != null) { // just the second one has nonzeros - x.setQuick(ye.index(), f.apply(0, ye.get())); - advanceThis = false; - advanceThat = true; - } else { // we're done, both are empty - break; - } - } - return x; - } - } - - /** - * If f(0, 0) = 0 we can iterate through the nonzeros in either x or y. - * In this case, we iterate through the nozeros of x and y alternatively (this works even when one of them - * doesn't support sequential access). Since we're merging the results into x, when iterating through y, the - * order of iteration matters and y must support sequential access. - */ - public static class AssignIterateUnionRandomMergeUpdates extends VectorBinaryAssign { - - @Override - public boolean isValid(Vector x, Vector y, DoubleDoubleFunction f) { - return !f.isDensifying() && !x.isAddConstantTime() && y.isSequentialAccess(); - } - - @Override - public double estimateCost(Vector x, Vector y, DoubleDoubleFunction f) { - return Math.max(x.getNumNondefaultElements() * x.getIteratorAdvanceCost() * y.getLookupCost(), - y.getNumNondefaultElements() * y.getIteratorAdvanceCost() * x.getLookupCost()); - } - - @Override - public Vector assign(Vector x, Vector y, DoubleDoubleFunction f) { - OpenIntHashSet visited = new OpenIntHashSet(); - for (Element xe : x.nonZeroes()) { - xe.set(f.apply(xe.get(), y.getQuick(xe.index()))); - visited.add(xe.index()); - } - OrderedIntDoubleMapping updates = new OrderedIntDoubleMapping(false); - for (Element ye : y.nonZeroes()) { - if (!visited.contains(ye.index())) { - updates.set(ye.index(), f.apply(x.getQuick(ye.index()), ye.get())); - } - } - x.mergeUpdates(updates); - return x; - } - } - - /** - * If f(0, 0) = 0 we can iterate through the nonzeros in either x or y. - * In this case, we iterate through the nozeros of x and y alternatively (this works even when one of them - * doesn't support sequential access). Because updates to x are inplace, neither x, nor y need to support - * sequential access. - */ - public static class AssignIterateUnionRandomInplaceUpdates extends VectorBinaryAssign { - - @Override - public boolean isValid(Vector x, Vector y, DoubleDoubleFunction f) { - return !f.isDensifying() && x.isAddConstantTime(); - } - - @Override - public double estimateCost(Vector x, Vector y, DoubleDoubleFunction f) { - return Math.max(x.getNumNondefaultElements() * x.getIteratorAdvanceCost() * y.getLookupCost(), - y.getNumNondefaultElements() * y.getIteratorAdvanceCost() * x.getLookupCost()); - } - @Override - public Vector assign(Vector x, Vector y, DoubleDoubleFunction f) { - OpenIntHashSet visited = new OpenIntHashSet(); - for (Element xe : x.nonZeroes()) { - xe.set(f.apply(xe.get(), y.getQuick(xe.index()))); - visited.add(xe.index()); - } - for (Element ye : y.nonZeroes()) { - if (!visited.contains(ye.index())) { - x.setQuick(ye.index(), f.apply(x.getQuick(ye.index()), ye.get())); - } - } - return x; - } - } - - public static class AssignAllIterateSequentialMergeUpdates extends VectorBinaryAssign { - - @Override - public boolean isValid(Vector x, Vector y, DoubleDoubleFunction f) { - return x.isSequentialAccess() && y.isSequentialAccess() && !x.isAddConstantTime() && !x.isDense() && !y.isDense(); - } - - @Override - public double estimateCost(Vector x, Vector y, DoubleDoubleFunction f) { - return Math.max(x.size() * x.getIteratorAdvanceCost(), y.size() * y.getIteratorAdvanceCost()); - } - - @Override - public Vector assign(Vector x, Vector y, DoubleDoubleFunction f) { - Iterator<Vector.Element> xi = x.all().iterator(); - Iterator<Vector.Element> yi = y.all().iterator(); - OrderedIntDoubleMapping updates = new OrderedIntDoubleMapping(false); - while (xi.hasNext() && yi.hasNext()) { - Element xe = xi.next(); - updates.set(xe.index(), f.apply(xe.get(), yi.next().get())); - } - x.mergeUpdates(updates); - return x; - } - } - - public static class AssignAllIterateSequentialInplaceUpdates extends VectorBinaryAssign { - - @Override - public boolean isValid(Vector x, Vector y, DoubleDoubleFunction f) { - return x.isSequentialAccess() && y.isSequentialAccess() && x.isAddConstantTime() - && !x.isDense() && !y.isDense(); - } - - @Override - public double estimateCost(Vector x, Vector y, DoubleDoubleFunction f) { - return Math.max(x.size() * x.getIteratorAdvanceCost(), y.size() * y.getIteratorAdvanceCost()); - } - - @Override - public Vector assign(Vector x, Vector y, DoubleDoubleFunction f) { - Iterator<Vector.Element> xi = x.all().iterator(); - Iterator<Vector.Element> yi = y.all().iterator(); - while (xi.hasNext() && yi.hasNext()) { - Element xe = xi.next(); - x.setQuick(xe.index(), f.apply(xe.get(), yi.next().get())); - } - return x; - } - } - - public static class AssignAllIterateThisLookupThatMergeUpdates extends VectorBinaryAssign { - - @Override - public boolean isValid(Vector x, Vector y, DoubleDoubleFunction f) { - return !x.isAddConstantTime() && !x.isDense(); - } - - @Override - public double estimateCost(Vector x, Vector y, DoubleDoubleFunction f) { - return x.size() * x.getIteratorAdvanceCost() * y.getLookupCost(); - } - - @Override - public Vector assign(Vector x, Vector y, DoubleDoubleFunction f) { - OrderedIntDoubleMapping updates = new OrderedIntDoubleMapping(false); - for (Element xe : x.all()) { - updates.set(xe.index(), f.apply(xe.get(), y.getQuick(xe.index()))); - } - x.mergeUpdates(updates); - return x; - } - } - - public static class AssignAllIterateThisLookupThatInplaceUpdates extends VectorBinaryAssign { - - @Override - public boolean isValid(Vector x, Vector y, DoubleDoubleFunction f) { - return x.isAddConstantTime() && !x.isDense(); - } - - @Override - public double estimateCost(Vector x, Vector y, DoubleDoubleFunction f) { - return x.size() * x.getIteratorAdvanceCost() * y.getLookupCost(); - } - - @Override - public Vector assign(Vector x, Vector y, DoubleDoubleFunction f) { - for (Element xe : x.all()) { - x.setQuick(xe.index(), f.apply(xe.get(), y.getQuick(xe.index()))); - } - return x; - } - } - - public static class AssignAllIterateThatLookupThisMergeUpdates extends VectorBinaryAssign { - - @Override - public boolean isValid(Vector x, Vector y, DoubleDoubleFunction f) { - return !x.isAddConstantTime() && !y.isDense(); - } - - @Override - public double estimateCost(Vector x, Vector y, DoubleDoubleFunction f) { - return y.size() * y.getIteratorAdvanceCost() * x.getLookupCost(); - } - - @Override - public Vector assign(Vector x, Vector y, DoubleDoubleFunction f) { - OrderedIntDoubleMapping updates = new OrderedIntDoubleMapping(false); - for (Element ye : y.all()) { - updates.set(ye.index(), f.apply(x.getQuick(ye.index()), ye.get())); - } - x.mergeUpdates(updates); - return x; - } - } - - public static class AssignAllIterateThatLookupThisInplaceUpdates extends VectorBinaryAssign { - - @Override - public boolean isValid(Vector x, Vector y, DoubleDoubleFunction f) { - return x.isAddConstantTime() && !y.isDense(); - } - - @Override - public double estimateCost(Vector x, Vector y, DoubleDoubleFunction f) { - return y.size() * y.getIteratorAdvanceCost() * x.getLookupCost(); - } - - @Override - public Vector assign(Vector x, Vector y, DoubleDoubleFunction f) { - for (Element ye : y.all()) { - x.setQuick(ye.index(), f.apply(x.getQuick(ye.index()), ye.get())); - } - return x; - } - } - - public static class AssignAllLoopMergeUpdates extends VectorBinaryAssign { - - @Override - public boolean isValid(Vector x, Vector y, DoubleDoubleFunction f) { - return !x.isAddConstantTime(); - } - - @Override - public double estimateCost(Vector x, Vector y, DoubleDoubleFunction f) { - return x.size() * x.getLookupCost() * y.getLookupCost(); - } - - @Override - public Vector assign(Vector x, Vector y, DoubleDoubleFunction f) { - OrderedIntDoubleMapping updates = new OrderedIntDoubleMapping(false); - for (int i = 0; i < x.size(); ++i) { - updates.set(i, f.apply(x.getQuick(i), y.getQuick(i))); - } - x.mergeUpdates(updates); - return x; - } - } - - public static class AssignAllLoopInplaceUpdates extends VectorBinaryAssign { - - @Override - public boolean isValid(Vector x, Vector y, DoubleDoubleFunction f) { - return x.isAddConstantTime(); - } - - @Override - public double estimateCost(Vector x, Vector y, DoubleDoubleFunction f) { - return x.size() * x.getLookupCost() * y.getLookupCost(); - } - - @Override - public Vector assign(Vector x, Vector y, DoubleDoubleFunction f) { - for (int i = 0; i < x.size(); ++i) { - x.setQuick(i, f.apply(x.getQuick(i), y.getQuick(i))); - } - return x; - } - } -} http://git-wip-us.apache.org/repos/asf/mahout/blob/99a5358f/math/src/main/java/org/apache/mahout/math/VectorIterable.java ---------------------------------------------------------------------- diff --git a/math/src/main/java/org/apache/mahout/math/VectorIterable.java b/math/src/main/java/org/apache/mahout/math/VectorIterable.java deleted file mode 100644 index 8414fdb..0000000 --- a/math/src/main/java/org/apache/mahout/math/VectorIterable.java +++ /dev/null @@ -1,56 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.mahout.math; - -import java.util.Iterator; - -public interface VectorIterable extends Iterable<MatrixSlice> { - - /* Iterate all rows in order */ - Iterator<MatrixSlice> iterateAll(); - - /* Iterate all non empty rows in arbitrary order */ - Iterator<MatrixSlice> iterateNonEmpty(); - - int numSlices(); - - int numRows(); - - int numCols(); - - /** - * Return a new vector with cardinality equal to getNumRows() of this matrix which is the matrix product of the - * recipient and the argument - * - * @param v a vector with cardinality equal to getNumCols() of the recipient - * @return a new vector (typically a DenseVector) - * @throws CardinalityException if this.getNumRows() != v.size() - */ - Vector times(Vector v); - - /** - * Convenience method for producing this.transpose().times(this.times(v)), which can be implemented with only one pass - * over the matrix, without making the transpose() call (which can be expensive if the matrix is sparse) - * - * @param v a vector with cardinality equal to getNumCols() of the recipient - * @return a new vector (typically a DenseVector) with cardinality equal to that of the argument. - * @throws CardinalityException if this.getNumCols() != v.size() - */ - Vector timesSquared(Vector v); - -} http://git-wip-us.apache.org/repos/asf/mahout/blob/99a5358f/math/src/main/java/org/apache/mahout/math/VectorView.java ---------------------------------------------------------------------- diff --git a/math/src/main/java/org/apache/mahout/math/VectorView.java b/math/src/main/java/org/apache/mahout/math/VectorView.java deleted file mode 100644 index 62c5490..0000000 --- a/math/src/main/java/org/apache/mahout/math/VectorView.java +++ /dev/null @@ -1,238 +0,0 @@ -/** - * Licensed to the Apache Software Foundation (ASF) under one or more - * contributor license agreements. See the NOTICE file distributed with - * this work for additional information regarding copyright ownership. - * The ASF licenses this file to You under the Apache License, Version 2.0 - * (the "License"); you may not use this file except in compliance with - * the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, software - * distributed under the License is distributed on an "AS IS" BASIS, - * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. - * See the License for the specific language governing permissions and - * limitations under the License. - */ - -package org.apache.mahout.math; - -import java.util.Iterator; - -import com.google.common.collect.AbstractIterator; - -/** Implements subset view of a Vector */ -public class VectorView extends AbstractVector { - - protected Vector vector; - - // the offset into the Vector - protected int offset; - - /** For serialization purposes only */ - public VectorView() { - super(0); - } - - public VectorView(Vector vector, int offset, int cardinality) { - super(cardinality); - this.vector = vector; - this.offset = offset; - } - - @Override - protected Matrix matrixLike(int rows, int columns) { - return ((AbstractVector) vector).matrixLike(rows, columns); - } - - @Override - public Vector clone() { - VectorView r = (VectorView) super.clone(); - r.vector = vector.clone(); - r.offset = offset; - return r; - } - - @Override - public boolean isDense() { - return vector.isDense(); - } - - @Override - public boolean isSequentialAccess() { - return vector.isSequentialAccess(); - } - - @Override - public VectorView like() { - return new VectorView(vector.like(), offset, size()); - } - - @Override - public Vector like(int cardinality) { - return vector.like(cardinality); - } - - @Override - public double getQuick(int index) { - return vector.getQuick(offset + index); - } - - @Override - public void setQuick(int index, double value) { - vector.setQuick(offset + index, value); - } - - @Override - public int getNumNondefaultElements() { - return size(); - } - - @Override - public Vector viewPart(int offset, int length) { - if (offset < 0) { - throw new IndexException(offset, size()); - } - if (offset + length > size()) { - throw new IndexException(offset + length, size()); - } - return new VectorView(vector, offset + this.offset, length); - } - - /** @return true if index is a valid index in the underlying Vector */ - private boolean isInView(int index) { - return index >= offset && index < offset + size(); - } - - @Override - public Iterator<Element> iterateNonZero() { - return new NonZeroIterator(); - } - - @Override - public Iterator<Element> iterator() { - return new AllIterator(); - } - - public final class NonZeroIterator extends AbstractIterator<Element> { - - private final Iterator<Element> it; - - private NonZeroIterator() { - it = vector.nonZeroes().iterator(); - } - - @Override - protected Element computeNext() { - while (it.hasNext()) { - Element el = it.next(); - if (isInView(el.index()) && el.get() != 0) { - Element decorated = el; /* vector.getElement(el.index()); */ - return new DecoratorElement(decorated); - } - } - return endOfData(); - } - - } - - public final class AllIterator extends AbstractIterator<Element> { - - private final Iterator<Element> it; - - private AllIterator() { - it = vector.all().iterator(); - } - - @Override - protected Element computeNext() { - while (it.hasNext()) { - Element el = it.next(); - if (isInView(el.index())) { - Element decorated = vector.getElement(el.index()); - return new DecoratorElement(decorated); - } - } - return endOfData(); // No element was found - } - - } - - private final class DecoratorElement implements Element { - - private final Element decorated; - - private DecoratorElement(Element decorated) { - this.decorated = decorated; - } - - @Override - public double get() { - return decorated.get(); - } - - @Override - public int index() { - return decorated.index() - offset; - } - - @Override - public void set(double value) { - decorated.set(value); - } - } - - @Override - public double getLengthSquared() { - double result = 0.0; - int size = size(); - for (int i = 0; i < size; i++) { - double value = getQuick(i); - result += value * value; - } - return result; - } - - @Override - public double getDistanceSquared(Vector v) { - double result = 0.0; - int size = size(); - for (int i = 0; i < size; i++) { - double delta = getQuick(i) - v.getQuick(i); - result += delta * delta; - } - return result; - } - - @Override - public double getLookupCost() { - return vector.getLookupCost(); - } - - @Override - public double getIteratorAdvanceCost() { - // TODO: remove the 2x after fixing the Element iterator - return 2 * vector.getIteratorAdvanceCost(); - } - - @Override - public boolean isAddConstantTime() { - return vector.isAddConstantTime(); - } - - /** - * Used internally by assign() to update multiple indices and values at once. - * Only really useful for sparse vectors (especially SequentialAccessSparseVector). - * <p> - * If someone ever adds a new type of sparse vectors, this method must merge (index, value) pairs into the vector. - * - * @param updates a mapping of indices to values to merge in the vector. - */ - @Override - public void mergeUpdates(OrderedIntDoubleMapping updates) { - for (int i = 0; i < updates.getNumMappings(); ++i) { - updates.setIndexAt(i, updates.indexAt(i) + offset); - } - vector.mergeUpdates(updates); - } -}
