http://git-wip-us.apache.org/repos/asf/mahout/blob/545648f6/core/src/main/java/org/apache/mahout/math/DiagonalMatrix.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/mahout/math/DiagonalMatrix.java b/core/src/main/java/org/apache/mahout/math/DiagonalMatrix.java new file mode 100644 index 0000000..070fad2 --- /dev/null +++ b/core/src/main/java/org/apache/mahout/math/DiagonalMatrix.java @@ -0,0 +1,378 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.mahout.math; + +import org.apache.mahout.math.flavor.MatrixFlavor; +import org.apache.mahout.math.flavor.TraversingStructureEnum; + +import java.util.Iterator; +import java.util.NoSuchElementException; + +public class DiagonalMatrix extends AbstractMatrix implements MatrixTimesOps { + private final Vector diagonal; + + public DiagonalMatrix(Vector values) { + super(values.size(), values.size()); + this.diagonal = values; + } + + public DiagonalMatrix(Matrix values) { + this(values.viewDiagonal()); + } + + public DiagonalMatrix(double value, int size) { + this(new ConstantVector(value, size)); + } + + public DiagonalMatrix(double[] values) { + super(values.length, values.length); + this.diagonal = new DenseVector(values); + } + + public static DiagonalMatrix identity(int size) { + return new DiagonalMatrix(1, size); + } + + @Override + public Matrix assignColumn(int column, Vector other) { + throw new UnsupportedOperationException("Can't assign a column to a diagonal matrix"); + } + + /** + * Assign the other vector values to the row of the receiver + * + * @param row the int row to assign + * @param other a Vector + * @return the modified receiver + * @throws CardinalityException if the cardinalities differ + */ + @Override + public Matrix assignRow(int row, Vector other) { + throw new UnsupportedOperationException("Can't assign a row to a diagonal matrix"); + } + + @Override + public Vector viewRow(int row) { + return new SingleElementVector(row); + } + + @Override + public Vector viewColumn(int row) { + return new SingleElementVector(row); + } + + /** + * Special class to implement views of rows and columns of a diagonal matrix. + */ + public class SingleElementVector extends AbstractVector { + private int index; + + public SingleElementVector(int index) { + super(diagonal.size()); + this.index = index; + } + + @Override + public double getQuick(int index) { + if (index == this.index) { + return diagonal.get(index); + } else { + return 0; + } + } + + @Override + public void set(int index, double value) { + if (index == this.index) { + diagonal.set(index, value); + } else { + throw new IllegalArgumentException("Can't set off-diagonal element of diagonal matrix"); + } + } + + @Override + protected Iterator<Element> iterateNonZero() { + return new Iterator<Element>() { + boolean more = true; + + @Override + public boolean hasNext() { + return more; + } + + @Override + public Element next() { + if (more) { + more = false; + return new Element() { + @Override + public double get() { + return diagonal.get(index); + } + + @Override + public int index() { + return index; + } + + @Override + public void set(double value) { + diagonal.set(index, value); + } + }; + } else { + throw new NoSuchElementException("Only one non-zero element in a row or column of a diagonal matrix"); + } + } + + @Override + public void remove() { + throw new UnsupportedOperationException("Can't remove from vector view"); + } + }; + } + + @Override + protected Iterator<Element> iterator() { + return new Iterator<Element>() { + int i = 0; + + Element r = new Element() { + @Override + public double get() { + if (i == index) { + return diagonal.get(index); + } else { + return 0; + } + } + + @Override + public int index() { + return i; + } + + @Override + public void set(double value) { + if (i == index) { + diagonal.set(index, value); + } else { + throw new IllegalArgumentException("Can't set any element but diagonal"); + } + } + }; + + @Override + public boolean hasNext() { + return i < diagonal.size() - 1; + } + + @Override + public Element next() { + if (i < SingleElementVector.this.size() - 1) { + i++; + return r; + } else { + throw new NoSuchElementException("Attempted to access passed last element of vector"); + } + } + + + @Override + public void remove() { + throw new UnsupportedOperationException("Default operation"); + } + }; + } + + @Override + protected Matrix matrixLike(int rows, int columns) { + return new DiagonalMatrix(rows, columns); + } + + @Override + public boolean isDense() { + return false; + } + + @Override + public boolean isSequentialAccess() { + return true; + } + + @Override + public void mergeUpdates(OrderedIntDoubleMapping updates) { + throw new UnsupportedOperationException("Default operation"); + } + + @Override + public Vector like() { + return new DenseVector(size()); + } + + @Override + public Vector like(int cardinality) { + return new DenseVector(cardinality); + } + + @Override + public void setQuick(int index, double value) { + if (index == this.index) { + diagonal.set(this.index, value); + } else { + throw new IllegalArgumentException("Can't set off-diagonal element of DiagonalMatrix"); + } + } + + @Override + public int getNumNondefaultElements() { + return 1; + } + + @Override + public double getLookupCost() { + return 0; + } + + @Override + public double getIteratorAdvanceCost() { + return 1; + } + + @Override + public boolean isAddConstantTime() { + return false; + } + } + + /** + * Provides a view of the diagonal of a matrix. + */ + @Override + public Vector viewDiagonal() { + return this.diagonal; + } + + /** + * Return the value at the given location, without checking bounds + * + * @param row an int row index + * @param column an int column index + * @return the double at the index + */ + @Override + public double getQuick(int row, int column) { + if (row == column) { + return diagonal.get(row); + } else { + return 0; + } + } + + /** + * Return an empty matrix of the same underlying class as the receiver + * + * @return a Matrix + */ + @Override + public Matrix like() { + return new SparseRowMatrix(rowSize(), columnSize()); + } + + /** + * Returns an empty matrix of the same underlying class as the receiver and of the specified + * size. + * + * @param rows the int number of rows + * @param columns the int number of columns + */ + @Override + public Matrix like(int rows, int columns) { + return new SparseRowMatrix(rows, columns); + } + + @Override + public void setQuick(int row, int column, double value) { + if (row == column) { + diagonal.set(row, value); + } else { + throw new UnsupportedOperationException("Can't set off-diagonal element"); + } + } + + /** + * Return the number of values in the recipient + * + * @return an int[2] containing [row, column] count + */ + @Override + public int[] getNumNondefaultElements() { + throw new UnsupportedOperationException("Don't understand how to implement this"); + } + + /** + * Return a new matrix containing the subset of the recipient + * + * @param offset an int[2] offset into the receiver + * @param size the int[2] size of the desired result + * @return a new Matrix that is a view of the original + * @throws CardinalityException if the length is greater than the cardinality of the receiver + * @throws IndexException if the offset is negative or the offset+length is outside of the + * receiver + */ + @Override + public Matrix viewPart(int[] offset, int[] size) { + return new MatrixView(this, offset, size); + } + + @Override + public Matrix times(Matrix other) { + return timesRight(other); + } + + @Override + public Matrix timesRight(Matrix that) { + if (that.numRows() != diagonal.size()) { + throw new IllegalArgumentException("Incompatible number of rows in the right operand of matrix multiplication."); + } + Matrix m = that.like(); + for (int row = 0; row < diagonal.size(); row++) { + m.assignRow(row, that.viewRow(row).times(diagonal.getQuick(row))); + } + return m; + } + + @Override + public Matrix timesLeft(Matrix that) { + if (that.numCols() != diagonal.size()) { + throw new IllegalArgumentException( + "Incompatible number of rows in the left operand of matrix-matrix multiplication."); + } + Matrix m = that.like(); + for (int col = 0; col < diagonal.size(); col++) { + m.assignColumn(col, that.viewColumn(col).times(diagonal.getQuick(col))); + } + return m; + } + + @Override + public MatrixFlavor getFlavor() { + return MatrixFlavor.DIAGONALLIKE; + } + +}
http://git-wip-us.apache.org/repos/asf/mahout/blob/545648f6/core/src/main/java/org/apache/mahout/math/FileBasedMatrix.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/mahout/math/FileBasedMatrix.java b/core/src/main/java/org/apache/mahout/math/FileBasedMatrix.java new file mode 100644 index 0000000..3a19318 --- /dev/null +++ b/core/src/main/java/org/apache/mahout/math/FileBasedMatrix.java @@ -0,0 +1,185 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.mahout.math; + +import com.google.common.base.Preconditions; +import com.google.common.collect.Lists; + +import java.io.File; +import java.io.FileInputStream; +import java.io.FileOutputStream; +import java.io.IOException; +import java.nio.ByteBuffer; +import java.nio.DoubleBuffer; +import java.nio.MappedByteBuffer; +import java.nio.channels.FileChannel; +import java.util.List; + +/** + * Provides a way to get data from a file and treat it as if it were a matrix, but avoids putting all that + * data onto the Java heap. Instead, the file is mapped into non-heap memory as a DoubleBuffer and we access + * that instead. + */ +public final class FileBasedMatrix extends AbstractMatrix { + private final int rowsPerBlock; + private final List<DoubleBuffer> content = Lists.newArrayList(); + + /** + * Constructs an empty matrix of the given size. + * + * @param rows The number of rows in the result. + * @param columns The number of columns in the result. + */ + public FileBasedMatrix(int rows, int columns) { + super(rows, columns); + long maxRows = ((1L << 31) - 1) / (columns * 8); + if (rows > maxRows) { + rowsPerBlock = (int) maxRows; + } else { + rowsPerBlock = rows; + } + } + + private void addData(DoubleBuffer content) { + this.content.add(content); + } + + public void setData(File f, boolean loadNow) throws IOException { + Preconditions.checkArgument(f.length() == rows * columns * 8L, "File " + f + " is wrong length"); + + for (int i = 0; i < (rows + rowsPerBlock - 1) / rowsPerBlock; i++) { + long start = i * rowsPerBlock * columns * 8L; + long size = rowsPerBlock * columns * 8L; + MappedByteBuffer buf = new FileInputStream(f).getChannel().map(FileChannel.MapMode.READ_ONLY, start, + Math.min(f.length() - start, size)); + if (loadNow) { + buf.load(); + } + addData(buf.asDoubleBuffer()); + } + } + + public static void writeMatrix(File f, Matrix m) throws IOException { + Preconditions.checkArgument(f.canWrite(), "Can't write to output file"); + FileOutputStream fos = new FileOutputStream(f); + try { + ByteBuffer buf = ByteBuffer.allocate(m.columnSize() * 8); + for (MatrixSlice row : m) { + buf.clear(); + for (Vector.Element element : row.vector().all()) { + buf.putDouble(element.get()); + } + buf.flip(); + fos.write(buf.array()); + } + } finally { + fos.close(); + } + } + + /** + * Assign the other vector values to the column of the receiver + * + * @param column the int row to assign + * @param other a Vector + * @return the modified receiver + * @throws org.apache.mahout.math.CardinalityException + * if the cardinalities differ + */ + @Override + public Matrix assignColumn(int column, Vector other) { + throw new UnsupportedOperationException("Default operation"); + } + + /** + * Assign the other vector values to the row of the receiver + * + * @param row the int row to assign + * @param other a Vector + * @return the modified receiver + * @throws org.apache.mahout.math.CardinalityException + * if the cardinalities differ + */ + @Override + public Matrix assignRow(int row, Vector other) { + throw new UnsupportedOperationException("Default operation"); + } + + /** + * Return the value at the given indexes, without checking bounds + * + * @param row an int row index + * @param column an int column index + * @return the double at the index + */ + @Override + public double getQuick(int row, int column) { + int block = row / rowsPerBlock; + return content.get(block).get((row % rowsPerBlock) * columns + column); + } + + /** + * Return an empty matrix of the same underlying class as the receiver + * + * @return a Matrix + */ + @Override + public Matrix like() { + throw new UnsupportedOperationException("Default operation"); + } + + /** + * Returns an empty matrix of the same underlying class as the receiver and of the specified size. + * + * @param rows the int number of rows + * @param columns the int number of columns + */ + @Override + public Matrix like(int rows, int columns) { + return new DenseMatrix(rows, columns); + } + + /** + * Set the value at the given index, without checking bounds + * + * @param row an int row index into the receiver + * @param column an int column index into the receiver + * @param value a double value to set + */ + @Override + public void setQuick(int row, int column, double value) { + throw new UnsupportedOperationException("Default operation"); + } + + /** + * Return a view into part of a matrix. Changes to the view will change the + * original matrix. + * + * @param offset an int[2] offset into the receiver + * @param size the int[2] size of the desired result + * @return a matrix that shares storage with part of the original matrix. + * @throws org.apache.mahout.math.CardinalityException + * if the length is greater than the cardinality of the receiver + * @throws org.apache.mahout.math.IndexException + * if the offset is negative or the offset+length is outside of the receiver + */ + @Override + public Matrix viewPart(int[] offset, int[] size) { + throw new UnsupportedOperationException("Default operation"); + } +} http://git-wip-us.apache.org/repos/asf/mahout/blob/545648f6/core/src/main/java/org/apache/mahout/math/FileBasedSparseBinaryMatrix.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/mahout/math/FileBasedSparseBinaryMatrix.java b/core/src/main/java/org/apache/mahout/math/FileBasedSparseBinaryMatrix.java new file mode 100644 index 0000000..0b0c25e --- /dev/null +++ b/core/src/main/java/org/apache/mahout/math/FileBasedSparseBinaryMatrix.java @@ -0,0 +1,535 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.mahout.math; + +import java.io.DataOutputStream; +import java.io.File; +import java.io.FileInputStream; +import java.io.FileOutputStream; +import java.io.IOException; +import java.nio.ByteBuffer; +import java.nio.IntBuffer; +import java.nio.channels.FileChannel; +import java.util.Collections; +import java.util.Iterator; +import java.util.List; + +import com.google.common.base.Function; +import com.google.common.base.Preconditions; +import com.google.common.collect.AbstractIterator; +import com.google.common.collect.Iterables; +import com.google.common.collect.Lists; + +/** + * Provides a way to get data from a file and treat it as if it were a matrix, but avoids putting + * all that data onto the Java heap. Instead, the file is mapped into non-heap memory as a + * DoubleBuffer and we access that instead. The interesting aspect of this is that the values in + * the matrix are binary and sparse so we don't need to store the actual data, just the location of + * non-zero values. + * <p> + * Currently file data is formatted as follows: + * <p> + * <ul> <li>A magic number to indicate the file format.</li> <li>The size of the matrix (max rows + * and columns possible)</li> <li>Number of non-zeros in each row.</li> <li>A list of non-zero + * columns for each row. The list starts with a count and then has column numbers</li> </ul> + * <p> + * It would be preferable to use something like protobufs to define the format so that we can use + * different row formats for different kinds of data. For instance, Golay coding of column numbers + * or compressed bit vectors might be good representations for some purposes. + */ +public final class FileBasedSparseBinaryMatrix extends AbstractMatrix { + private static final int MAGIC_NUMBER_V0 = 0x12d7067d; + + private final List<IntBuffer> data = Lists.newArrayList(); + private int[] bufferIndex; + private int[] rowOffset; + private int[] rowSize; + + /** + * Constructs an empty matrix of the given size. + * + * @param rows The number of rows in the result. + * @param columns The number of columns in the result. + */ + public FileBasedSparseBinaryMatrix(int rows, int columns) { + super(rows, columns); + } + + public void setData(File f) throws IOException { + List<ByteBuffer> buffers = Lists.newArrayList(); + FileChannel input = new FileInputStream(f).getChannel(); + + buffers.add(input.map(FileChannel.MapMode.READ_ONLY, 0, Math.min(Integer.MAX_VALUE, f.length()))); + data.add(buffers.get(0).asIntBuffer()); + Preconditions.checkArgument(buffers.get(0).getInt() == MAGIC_NUMBER_V0, "Wrong type of file"); + + int rows = buffers.get(0).getInt(); + int cols = buffers.get(0).getInt(); + Preconditions.checkArgument(rows == rowSize()); + Preconditions.checkArgument(cols == columnSize()); + + rowOffset = new int[rows]; + rowSize = new int[rows]; + bufferIndex = new int[rows]; + + int offset = 12 + 4 * rows; + for (int i = 0; i < rows; i++) { + int size = buffers.get(0).getInt(); + int buffer = 0; + while (buffer < buffers.size()) { + if (offset + size * 4 <= buffers.get(buffer).limit()) { + break; + } else { + offset -= buffers.get(buffer).capacity(); + } + } + if (buffer == buffers.size()) { + buffers.add(input.map(FileChannel.MapMode.READ_ONLY, 0, Math.min(Integer.MAX_VALUE, f.length() - offset))); + data.add(buffers.get(buffer).asIntBuffer()); + } + rowOffset[i] = offset / 4; + rowSize[i] = size; + bufferIndex[i] = buffer; + +// final SparseBinaryVector v = new SparseBinaryVector(buffers.get(buffer), columns, offset, size); +// this.rows.add(v); + offset += size * 4; + } + } + + public static void writeMatrix(File f, Matrix m) throws IOException { + Preconditions.checkArgument(f.canWrite(), "Can't write to output file"); + FileOutputStream fos = new FileOutputStream(f); + + // write header + DataOutputStream out = new DataOutputStream(fos); + out.writeInt(MAGIC_NUMBER_V0); + out.writeInt(m.rowSize()); + out.writeInt(m.columnSize()); + + // compute offsets and write row headers + for (MatrixSlice row : m) { + int nondefaultElements = row.vector().getNumNondefaultElements(); + out.writeInt(nondefaultElements); + } + + // write rows + for (MatrixSlice row : m) { + List<Integer> columns = Lists.newArrayList(Iterables.transform(row.vector().nonZeroes(), + new Function<Vector.Element, Integer>() { + @Override + public Integer apply(Vector.Element element) { + return element.index(); + } + })); + Collections.sort(columns); + + for (Integer column : columns) { + out.writeInt(column); + } + } + + out.close(); + fos.close(); + } + + /** + * Assign the other vector values to the column of the receiver + * + * @param column the int row to assign + * @param other a Vector + * @return the modified receiver + * @throws org.apache.mahout.math.CardinalityException + * if the cardinalities differ + */ + @Override + public Matrix assignColumn(int column, Vector other) { + throw new UnsupportedOperationException("Default operation"); + } + + /** + * Assign the other vector values to the row of the receiver + * + * @param row the int row to assign + * @param other a Vector + * @return the modified receiver + * @throws org.apache.mahout.math.CardinalityException + * if the cardinalities differ + */ + @Override + public Matrix assignRow(int row, Vector other) { + throw new UnsupportedOperationException("Default operation"); + } + + /** + * Return the value at the given indexes, without checking bounds + * + * @param rowIndex an int row index + * @param columnIndex an int column index + * @return the double at the index + */ + @Override + public double getQuick(int rowIndex, int columnIndex) { + IntBuffer tmp = data.get(bufferIndex[rowIndex]).asReadOnlyBuffer(); + tmp.position(rowOffset[rowIndex]); + tmp.limit(rowSize[rowIndex]); + tmp = tmp.slice(); + return searchForIndex(tmp, columnIndex); + } + + private static double searchForIndex(IntBuffer row, int columnIndex) { + int high = row.limit(); + if (high == 0) { + return 0; + } + int low = 0; + while (high > low) { + int mid = (low + high) / 2; + if (row.get(mid) < columnIndex) { + low = mid + 1; + } else { + high = mid; + } + } + if (low >= row.limit()) { + return 0; + } else if (high == low && row.get(low) == columnIndex) { + return 1; + } else { + return 0; + } + } + + /** + * Return an empty matrix of the same underlying class as the receiver + * + * @return a Matrix + */ + @Override + public Matrix like() { + throw new UnsupportedOperationException("Default operation"); + } + + /** + * Returns an empty matrix of the same underlying class as the receiver and of the specified + * size. + * + * @param rows the int number of rows + * @param columns the int number of columns + */ + @Override + public Matrix like(int rows, int columns) { + return new DenseMatrix(rows, columns); + } + + /** + * Set the value at the given index, without checking bounds + * + * @param row an int row index into the receiver + * @param column an int column index into the receiver + * @param value a double value to set + */ + @Override + public void setQuick(int row, int column, double value) { + throw new UnsupportedOperationException("Default operation"); + } + + /** + * Return a view into part of a matrix. Changes to the view will change the original matrix. + * + * @param offset an int[2] offset into the receiver + * @param size the int[2] size of the desired result + * @return a matrix that shares storage with part of the original matrix. + * @throws org.apache.mahout.math.CardinalityException + * if the length is greater than the cardinality of the receiver + * @throws org.apache.mahout.math.IndexException + * if the offset is negative or the offset+length is outside of the receiver + */ + @Override + public Matrix viewPart(int[] offset, int[] size) { + throw new UnsupportedOperationException("Default operation"); + } + + /** + * Returns a view of a row. Changes to the view will affect the original. + * + * @param rowIndex Which row to return. + * @return A vector that references the desired row. + */ + @Override + public Vector viewRow(int rowIndex) { + IntBuffer tmp = data.get(bufferIndex[rowIndex]).asReadOnlyBuffer(); + tmp.position(rowOffset[rowIndex]); + tmp.limit(rowOffset[rowIndex] + rowSize[rowIndex]); + tmp = tmp.slice(); + return new SparseBinaryVector(tmp, columnSize()); + } + + private static class SparseBinaryVector extends AbstractVector { + private final IntBuffer buffer; + private final int maxIndex; + + private SparseBinaryVector(IntBuffer buffer, int maxIndex) { + super(maxIndex); + this.buffer = buffer; + this.maxIndex = maxIndex; + } + + SparseBinaryVector(ByteBuffer row, int maxIndex, int offset, int size) { + super(maxIndex); + row = row.asReadOnlyBuffer(); + row.position(offset); + row.limit(offset + size * 4); + row = row.slice(); + this.buffer = row.slice().asIntBuffer(); + this.maxIndex = maxIndex; + } + + /** + * Subclasses must override to return an appropriately sparse or dense result + * + * @param rows the row cardinality + * @param columns the column cardinality + * @return a Matrix + */ + @Override + protected Matrix matrixLike(int rows, int columns) { + throw new UnsupportedOperationException("Default operation"); + } + + /** + * Used internally by assign() to update multiple indices and values at once. + * Only really useful for sparse vectors (especially SequentialAccessSparseVector). + * <p/> + * If someone ever adds a new type of sparse vectors, this method must merge (index, value) pairs into the vector. + * + * @param updates a mapping of indices to values to merge in the vector. + */ + @Override + public void mergeUpdates(OrderedIntDoubleMapping updates) { + throw new UnsupportedOperationException("Cannot mutate SparseBinaryVector"); + } + + /** + * @return true iff this implementation should be considered dense -- that it explicitly represents + * every value + */ + @Override + public boolean isDense() { + return false; + } + + /** + * @return true iff this implementation should be considered to be iterable in index order in an + * efficient way. In particular this implies that {@link #iterator()} and {@link + * #iterateNonZero()} return elements in ascending order by index. + */ + @Override + public boolean isSequentialAccess() { + return true; + } + + /** + * Iterates over all elements + * + * NOTE: Implementations may choose to reuse the Element returned + * for performance reasons, so if you need a copy of it, you should call {@link #getElement(int)} + * for the given index + * + * @return An {@link java.util.Iterator} over all elements + */ + @Override + public Iterator<Element> iterator() { + return new AbstractIterator<Element>() { + int i = 0; + + @Override + protected Element computeNext() { + if (i < maxIndex) { + return new Element() { + int index = i++; + /** + * @return the value of this vector element. + */ + @Override + public double get() { + return getQuick(index); + } + + /** + * @return the index of this vector element. + */ + @Override + public int index() { + return index; + } + + /** + * @param value Set the current element to value. + */ + @Override + public void set(double value) { + throw new UnsupportedOperationException("Default operation"); + } + }; + } else { + return endOfData(); + } + } + }; + } + + /** + * Iterates over all non-zero elements. <p/> NOTE: Implementations may choose to reuse the Element + * returned for performance reasons, so if you need a copy of it, you should call {@link + * #getElement(int)} for the given index + * + * @return An {@link java.util.Iterator} over all non-zero elements + */ + @Override + public Iterator<Element> iterateNonZero() { + return new AbstractIterator<Element>() { + int i = 0; + @Override + protected Element computeNext() { + if (i < buffer.limit()) { + return new BinaryReadOnlyElement(buffer.get(i++)); + } else { + return endOfData(); + } + } + }; + } + + /** + * Return the value at the given index, without checking bounds + * + * @param index an int index + * @return the double at the index + */ + @Override + public double getQuick(int index) { + return searchForIndex(buffer, index); + } + + /** + * Return an empty vector of the same underlying class as the receiver + * + * @return a Vector + */ + @Override + public Vector like() { + return new RandomAccessSparseVector(size()); + } + + @Override + public Vector like(int cardinality) { + return new RandomAccessSparseVector(cardinality); + } + + /** + * Copy the vector for fast operations. + * + * @return a Vector + */ + @Override + protected Vector createOptimizedCopy() { + return new RandomAccessSparseVector(size()).assign(this); + } + + /** + * Set the value at the given index, without checking bounds + * + * @param index an int index into the receiver + * @param value a double value to set + */ + @Override + public void setQuick(int index, double value) { + throw new UnsupportedOperationException("Read-only view"); + } + + /** + * Set the value at the given index, without checking bounds + * + * @param index an int index into the receiver + * @param increment a double value to set + */ + @Override + public void incrementQuick(int index, double increment) { + throw new UnsupportedOperationException("Read-only view"); + } + + /** + * Return the number of values in the recipient which are not the default value. For instance, for + * a sparse vector, this would be the number of non-zero values. + * + * @return an int + */ + @Override + public int getNumNondefaultElements() { + return buffer.limit(); + } + + @Override + public double getLookupCost() { + return 1; + } + + @Override + public double getIteratorAdvanceCost() { + return 1; + } + + @Override + public boolean isAddConstantTime() { + throw new UnsupportedOperationException("Can't add binary value"); + } + } + + public static class BinaryReadOnlyElement implements Vector.Element { + private final int index; + + public BinaryReadOnlyElement(int index) { + this.index = index; + } + + /** + * @return the value of this vector element. + */ + @Override + public double get() { + return 1; + } + + /** + * @return the index of this vector element. + */ + @Override + public int index() { + return index; + } + + /** + * @param value Set the current element to value. + */ + @Override + public void set(double value) { + throw new UnsupportedOperationException("Can't set binary value"); + } + } +} http://git-wip-us.apache.org/repos/asf/mahout/blob/545648f6/core/src/main/java/org/apache/mahout/math/FunctionalMatrixView.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/mahout/math/FunctionalMatrixView.java b/core/src/main/java/org/apache/mahout/math/FunctionalMatrixView.java new file mode 100644 index 0000000..9028e23 --- /dev/null +++ b/core/src/main/java/org/apache/mahout/math/FunctionalMatrixView.java @@ -0,0 +1,99 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.mahout.math; + +import org.apache.mahout.math.flavor.BackEnum; +import org.apache.mahout.math.flavor.MatrixFlavor; +import org.apache.mahout.math.flavor.TraversingStructureEnum; +import org.apache.mahout.math.function.IntIntFunction; + +/** + * Matrix View backed by an {@link IntIntFunction} + */ +class FunctionalMatrixView extends AbstractMatrix { + + /** + * view generator function + */ + private IntIntFunction gf; + private boolean denseLike; + private MatrixFlavor flavor; + + public FunctionalMatrixView(int rows, int columns, IntIntFunction gf) { + this(rows, columns, gf, false); + } + + /** + * @param gf generator function + * @param denseLike whether like() should create Dense or Sparse matrix. + */ + public FunctionalMatrixView(int rows, int columns, IntIntFunction gf, boolean denseLike) { + super(rows, columns); + this.gf = gf; + this.denseLike = denseLike; + flavor = new MatrixFlavor.FlavorImpl(BackEnum.JVMMEM, TraversingStructureEnum.BLOCKIFIED, denseLike); + } + + @Override + public Matrix assignColumn(int column, Vector other) { + throw new UnsupportedOperationException("Assignment to a matrix not supported"); + } + + @Override + public Matrix assignRow(int row, Vector other) { + throw new UnsupportedOperationException("Assignment to a matrix view not supported"); + } + + @Override + public double getQuick(int row, int column) { + return gf.apply(row, column); + } + + @Override + public Matrix like() { + return like(rows, columns); + } + + @Override + public Matrix like(int rows, int columns) { + if (denseLike) + return new DenseMatrix(rows, columns); + else + return new SparseMatrix(rows, columns); + } + + @Override + public void setQuick(int row, int column, double value) { + throw new UnsupportedOperationException("Assignment to a matrix view not supported"); + } + + @Override + public Vector viewRow(int row) { + return new MatrixVectorView(this, row, 0, 0, 1, denseLike); + } + + @Override + public Vector viewColumn(int column) { + return new MatrixVectorView(this, 0, column, 1, 0, denseLike); + } + + @Override + public MatrixFlavor getFlavor() { + return flavor; + } +} http://git-wip-us.apache.org/repos/asf/mahout/blob/545648f6/core/src/main/java/org/apache/mahout/math/IndexException.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/mahout/math/IndexException.java b/core/src/main/java/org/apache/mahout/math/IndexException.java new file mode 100644 index 0000000..489d536 --- /dev/null +++ b/core/src/main/java/org/apache/mahout/math/IndexException.java @@ -0,0 +1,30 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.mahout.math; + +/** + * Exception thrown when a matrix or vector is accessed at an index, or dimension, + * which does not logically exist in the entity. + */ +public class IndexException extends IllegalArgumentException { + + public IndexException(int index, int cardinality) { + super("Index " + index + " is outside allowable range of [0," + cardinality + ')'); + } + +} http://git-wip-us.apache.org/repos/asf/mahout/blob/545648f6/core/src/main/java/org/apache/mahout/math/LengthCachingVector.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/mahout/math/LengthCachingVector.java b/core/src/main/java/org/apache/mahout/math/LengthCachingVector.java new file mode 100644 index 0000000..770ccc4 --- /dev/null +++ b/core/src/main/java/org/apache/mahout/math/LengthCachingVector.java @@ -0,0 +1,35 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.mahout.math; + +/** + * Marker interface for vectors that may cache their squared length. + */ +interface LengthCachingVector { + /** + * Gets the currently cached squared length or if there is none, recalculates + * the value and returns that. + * @return The sum of the squares of all elements in the vector. + */ + double getLengthSquared(); + + /** + * Invalidates the length cache. This should be called by all mutators of the vector. + */ + void invalidateCachedLength(); +} http://git-wip-us.apache.org/repos/asf/mahout/blob/545648f6/core/src/main/java/org/apache/mahout/math/Matrices.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/mahout/math/Matrices.java b/core/src/main/java/org/apache/mahout/math/Matrices.java new file mode 100644 index 0000000..5d8b5c5 --- /dev/null +++ b/core/src/main/java/org/apache/mahout/math/Matrices.java @@ -0,0 +1,167 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.mahout.math; + +import com.google.common.base.Preconditions; +import org.apache.mahout.common.RandomUtils; +import org.apache.mahout.math.function.DoubleFunction; +import org.apache.mahout.math.function.Functions; +import org.apache.mahout.math.function.IntIntFunction; + +import java.util.Random; + +public final class Matrices { + + /** + * Create a matrix view based on a function generator. + * <p> + * The generator needs to be idempotent, i.e. returning same value + * for each combination of (row, column) argument sent to generator's + * {@link IntIntFunction#apply(int, int)} call. + * + * @param rows Number of rows in a view + * @param columns Number of columns in a view. + * @param gf view generator + * @param denseLike type of matrix returne dby {@link org.apache.mahout.math.Matrix#like()}. + * @return new matrix view. + */ + public static Matrix functionalMatrixView(final int rows, + final int columns, + final IntIntFunction gf, + final boolean denseLike) { + return new FunctionalMatrixView(rows, columns, gf, denseLike); + } + + /** + * Shorter form of {@link Matrices#functionalMatrixView(int, int, + * org.apache.mahout.math.function.IntIntFunction, boolean)}. + */ + public static Matrix functionalMatrixView(final int rows, + final int columns, + final IntIntFunction gf) { + return new FunctionalMatrixView(rows, columns, gf); + } + + /** + * A read-only transposed view of a matrix argument. + * + * @param m original matrix + * @return transposed view of original matrix + */ + public static Matrix transposedView(final Matrix m) { + + Preconditions.checkArgument(!(m instanceof SparseColumnMatrix)); + + if (m instanceof TransposedMatrixView) { + return ((TransposedMatrixView) m).getDelegate(); + } else { + return new TransposedMatrixView(m); + } + } + + /** + * Random Gaussian matrix view. + * + * @param seed generator seed + */ + public static Matrix gaussianView(final int rows, + final int columns, + long seed) { + return functionalMatrixView(rows, columns, gaussianGenerator(seed), true); + } + + + /** + * Matrix view based on uniform [-1,1) distribution. + * + * @param seed generator seed + */ + public static Matrix symmetricUniformView(final int rows, + final int columns, + int seed) { + return functionalMatrixView(rows, columns, uniformSymmetricGenerator(seed), true); + } + + /** + * Matrix view based on uniform [0,1) distribution. + * + * @param seed generator seed + */ + public static Matrix uniformView(final int rows, + final int columns, + int seed) { + return functionalMatrixView(rows, columns, uniformGenerator(seed), true); + } + + /** + * Generator for a matrix populated by random Gauissian values (Gaussian matrix view) + * + * @param seed The seed for the matrix. + * @return Gaussian {@link IntIntFunction} generating matrix view with normal values + */ + public static IntIntFunction gaussianGenerator(final long seed) { + final Random rnd = RandomUtils.getRandom(seed); + return new IntIntFunction() { + @Override + public double apply(int first, int second) { + rnd.setSeed(seed ^ (((long) first << 32) | (second & 0xffffffffL))); + return rnd.nextGaussian(); + } + }; + } + + private static final double UNIFORM_DIVISOR = Math.pow(2.0, 64); + + /** + * Uniform [-1,1) matrix generator function. + * <p> + * WARNING: to keep things performant, it is stateful and so not thread-safe. + * You'd need to create a copy per thread (with same seed) if shared between threads. + * + * @param seed - random seed initializer + * @return Uniform {@link IntIntFunction} generator + */ + public static IntIntFunction uniformSymmetricGenerator(final int seed) { + return new IntIntFunction() { + private byte[] data = new byte[8]; + + @Override + public double apply(int row, int column) { + long d = ((long) row << Integer.SIZE) | (column & 0xffffffffL); + for (int i = 0; i < 8; i++, d >>>= 8) data[i] = (byte) d; + long hash = MurmurHash.hash64A(data, seed); + return hash / UNIFORM_DIVISOR; + } + }; + } + + /** + * Uniform [0,1) matrix generator function + * + * @param seed generator seed + */ + public static IntIntFunction uniformGenerator(final int seed) { + return Functions.chain(new DoubleFunction() { + @Override + public double apply(double x) { + return (x + 1.0) / 2.0; + } + }, uniformSymmetricGenerator(seed)); + } + +} http://git-wip-us.apache.org/repos/asf/mahout/blob/545648f6/core/src/main/java/org/apache/mahout/math/Matrix.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/mahout/math/Matrix.java b/core/src/main/java/org/apache/mahout/math/Matrix.java new file mode 100644 index 0000000..57fab78 --- /dev/null +++ b/core/src/main/java/org/apache/mahout/math/Matrix.java @@ -0,0 +1,413 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.mahout.math; + +import org.apache.mahout.math.flavor.MatrixFlavor; +import org.apache.mahout.math.function.DoubleDoubleFunction; +import org.apache.mahout.math.function.DoubleFunction; +import org.apache.mahout.math.function.VectorFunction; + +import java.util.Map; + +/** The basic interface including numerous convenience functions */ +public interface Matrix extends Cloneable, VectorIterable { + + /** @return a formatted String suitable for output */ + String asFormatString(); + + /** + * Assign the value to all elements of the receiver + * + * @param value a double value + * @return the modified receiver + */ + Matrix assign(double value); + + /** + * Assign the values to the receiver + * + * @param values a double[] of values + * @return the modified receiver + * @throws CardinalityException if the cardinalities differ + */ + Matrix assign(double[][] values); + + /** + * Assign the other vector values to the receiver + * + * @param other a Matrix + * @return the modified receiver + * @throws CardinalityException if the cardinalities differ + */ + Matrix assign(Matrix other); + + /** + * Apply the function to each element of the receiver + * + * @param function a DoubleFunction to apply + * @return the modified receiver + */ + Matrix assign(DoubleFunction function); + + /** + * Apply the function to each element of the receiver and the corresponding element of the other argument + * + * @param other a Matrix containing the second arguments to the function + * @param function a DoubleDoubleFunction to apply + * @return the modified receiver + * @throws CardinalityException if the cardinalities differ + */ + Matrix assign(Matrix other, DoubleDoubleFunction function); + + /** + * Assign the other vector values to the column of the receiver + * + * @param column the int row to assign + * @param other a Vector + * @return the modified receiver + * @throws CardinalityException if the cardinalities differ + */ + Matrix assignColumn(int column, Vector other); + + /** + * Assign the other vector values to the row of the receiver + * + * @param row the int row to assign + * @param other a Vector + * @return the modified receiver + * @throws CardinalityException if the cardinalities differ + */ + Matrix assignRow(int row, Vector other); + + /** + * Collects the results of a function applied to each row of a matrix. + * @param f The function to be applied to each row. + * @return The vector of results. + */ + Vector aggregateRows(VectorFunction f); + + /** + * Collects the results of a function applied to each column of a matrix. + * @param f The function to be applied to each column. + * @return The vector of results. + */ + Vector aggregateColumns(VectorFunction f); + + /** + * Collects the results of a function applied to each element of a matrix and then + * aggregated. + * @param combiner A function that combines the results of the mapper. + * @param mapper A function to apply to each element. + * @return The result. + */ + double aggregate(DoubleDoubleFunction combiner, DoubleFunction mapper); + + /** + * @return The number of rows in the matrix. + */ + int columnSize(); + + /** + * @return Returns the number of rows in the matrix. + */ + int rowSize(); + + /** + * Return a copy of the recipient + * + * @return a new Matrix + */ + Matrix clone(); + + /** + * Returns matrix determinator using Laplace theorem + * + * @return a matrix determinator + */ + double determinant(); + + /** + * Return a new matrix containing the values of the recipient divided by the argument + * + * @param x a double value + * @return a new Matrix + */ + Matrix divide(double x); + + /** + * Return the value at the given indexes + * + * @param row an int row index + * @param column an int column index + * @return the double at the index + * @throws IndexException if the index is out of bounds + */ + double get(int row, int column); + + /** + * Return the value at the given indexes, without checking bounds + * + * @param row an int row index + * @param column an int column index + * @return the double at the index + */ + double getQuick(int row, int column); + + /** + * Return an empty matrix of the same underlying class as the receiver + * + * @return a Matrix + */ + Matrix like(); + + /** + * Returns an empty matrix of the same underlying class as the receiver and of the specified size. + * + * @param rows the int number of rows + * @param columns the int number of columns + */ + Matrix like(int rows, int columns); + + /** + * Return a new matrix containing the element by element difference of the recipient and the argument + * + * @param x a Matrix + * @return a new Matrix + * @throws CardinalityException if the cardinalities differ + */ + Matrix minus(Matrix x); + + /** + * Return a new matrix containing the sum of each value of the recipient and the argument + * + * @param x a double + * @return a new Matrix + */ + Matrix plus(double x); + + /** + * Return a new matrix containing the element by element sum of the recipient and the argument + * + * @param x a Matrix + * @return a new Matrix + * @throws CardinalityException if the cardinalities differ + */ + Matrix plus(Matrix x); + + /** + * Set the value at the given index + * + * @param row an int row index into the receiver + * @param column an int column index into the receiver + * @param value a double value to set + * @throws IndexException if the index is out of bounds + */ + void set(int row, int column, double value); + + void set(int row, double[] data); + + /** + * Set the value at the given index, without checking bounds + * + * @param row an int row index into the receiver + * @param column an int column index into the receiver + * @param value a double value to set + */ + void setQuick(int row, int column, double value); + + /** + * Return the number of values in the recipient + * + * @return an int[2] containing [row, column] count + */ + int[] getNumNondefaultElements(); + + /** + * Return a new matrix containing the product of each value of the recipient and the argument + * + * @param x a double argument + * @return a new Matrix + */ + Matrix times(double x); + + /** + * Return a new matrix containing the product of the recipient and the argument + * + * @param x a Matrix argument + * @return a new Matrix + * @throws CardinalityException if the cardinalities are incompatible + */ + Matrix times(Matrix x); + + /** + * Return a new matrix that is the transpose of the receiver + * + * @return the transpose + */ + Matrix transpose(); + + /** + * Return the sum of all the elements of the receiver + * + * @return a double + */ + double zSum(); + + /** + * Return a map of the current column label bindings of the receiver + * + * @return a {@code Map<String, Integer>} + */ + Map<String, Integer> getColumnLabelBindings(); + + /** + * Return a map of the current row label bindings of the receiver + * + * @return a {@code Map<String, Integer>} + */ + Map<String, Integer> getRowLabelBindings(); + + /** + * Sets a map of column label bindings in the receiver + * + * @param bindings a {@code Map<String, Integer>} of label bindings + */ + void setColumnLabelBindings(Map<String, Integer> bindings); + + /** + * Sets a map of row label bindings in the receiver + * + * @param bindings a {@code Map<String, Integer>} of label bindings + */ + void setRowLabelBindings(Map<String, Integer> bindings); + + /** + * Return the value at the given labels + * + * @param rowLabel a String row label + * @param columnLabel a String column label + * @return the double at the index + * + * @throws IndexException if the index is out of bounds + */ + double get(String rowLabel, String columnLabel); + + /** + * Set the value at the given index + * + * @param rowLabel a String row label + * @param columnLabel a String column label + * @param value a double value to set + * @throws IndexException if the index is out of bounds + */ + void set(String rowLabel, String columnLabel, double value); + + /** + * Set the value at the given index, updating the row and column label bindings + * + * @param rowLabel a String row label + * @param columnLabel a String column label + * @param row an int row index + * @param column an int column index + * @param value a double value + */ + void set(String rowLabel, String columnLabel, int row, int column, double value); + + /** + * Sets the row values at the given row label + * + * @param rowLabel a String row label + * @param rowData a double[] array of row data + */ + void set(String rowLabel, double[] rowData); + + /** + * Sets the row values at the given row index and updates the row labels + * + * @param rowLabel the String row label + * @param row an int the row index + * @param rowData a double[] array of row data + */ + void set(String rowLabel, int row, double[] rowData); + + /* + * Need stories for these but keeping them here for now. + * + */ + // void getNonZeros(IntArrayList jx, DoubleArrayList values); + // void foreachNonZero(IntDoubleFunction f); + // double aggregate(DoubleDoubleFunction aggregator, DoubleFunction map); + // double aggregate(Matrix other, DoubleDoubleFunction aggregator, + // DoubleDoubleFunction map); + // NewMatrix assign(Matrix y, DoubleDoubleFunction function, IntArrayList + // nonZeroIndexes); + + /** + * Return a view into part of a matrix. Changes to the view will change the + * original matrix. + * + * @param offset an int[2] offset into the receiver + * @param size the int[2] size of the desired result + * @return a matrix that shares storage with part of the original matrix. + * @throws CardinalityException if the length is greater than the cardinality of the receiver + * @throws IndexException if the offset is negative or the offset+length is outside of the receiver + */ + Matrix viewPart(int[] offset, int[] size); + + /** + * Return a view into part of a matrix. Changes to the view will change the + * original matrix. + * + * @param rowOffset The first row of the view + * @param rowsRequested The number of rows in the view + * @param columnOffset The first column in the view + * @param columnsRequested The number of columns in the view + * @return a matrix that shares storage with part of the original matrix. + * @throws CardinalityException if the length is greater than the cardinality of the receiver + * @throws IndexException if the offset is negative or the offset+length is outside of the + * receiver + */ + Matrix viewPart(int rowOffset, int rowsRequested, int columnOffset, int columnsRequested); + + /** + * Return a reference to a row. Changes to the view will change the original matrix. + * @param row The index of the row to return. + * @return A vector that shares storage with the original. + */ + Vector viewRow(int row); + + /** + * Return a reference to a column. Changes to the view will change the original matrix. + * @param column The index of the column to return. + * @return A vector that shares storage with the original. + */ + Vector viewColumn(int column); + + /** + * Returns a reference to the diagonal of a matrix. Changes to the view will change + * the original matrix. + * @return A vector that shares storage with the original matrix. + */ + Vector viewDiagonal(); + + /** + * Get matrix structural flavor (operations performance hints). This is optional operation, may + * throw {@link java.lang.UnsupportedOperationException}. + */ + MatrixFlavor getFlavor(); +} http://git-wip-us.apache.org/repos/asf/mahout/blob/545648f6/core/src/main/java/org/apache/mahout/math/MatrixSlice.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/mahout/math/MatrixSlice.java b/core/src/main/java/org/apache/mahout/math/MatrixSlice.java new file mode 100644 index 0000000..51378c1 --- /dev/null +++ b/core/src/main/java/org/apache/mahout/math/MatrixSlice.java @@ -0,0 +1,36 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.mahout.math; + +public class MatrixSlice extends DelegatingVector { + private int index; + + public MatrixSlice(Vector v, int index) { + super(v); + this.index = index; + } + + public Vector vector() { + return getVector(); + } + + public int index() { + return index; + } +} + http://git-wip-us.apache.org/repos/asf/mahout/blob/545648f6/core/src/main/java/org/apache/mahout/math/MatrixTimesOps.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/mahout/math/MatrixTimesOps.java b/core/src/main/java/org/apache/mahout/math/MatrixTimesOps.java new file mode 100644 index 0000000..30d2afb --- /dev/null +++ b/core/src/main/java/org/apache/mahout/math/MatrixTimesOps.java @@ -0,0 +1,35 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.mahout.math; + +/** + * Optional interface for optimized matrix multiplications. + * Some concrete Matrix implementations may mix this in. + */ +public interface MatrixTimesOps { + /** + * computes matrix product of (this * that) + */ + Matrix timesRight(Matrix that); + + /** + * Computes matrix product of (that * this) + */ + Matrix timesLeft(Matrix that); + +} http://git-wip-us.apache.org/repos/asf/mahout/blob/545648f6/core/src/main/java/org/apache/mahout/math/MatrixVectorView.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/mahout/math/MatrixVectorView.java b/core/src/main/java/org/apache/mahout/math/MatrixVectorView.java new file mode 100644 index 0000000..6ad44b5 --- /dev/null +++ b/core/src/main/java/org/apache/mahout/math/MatrixVectorView.java @@ -0,0 +1,292 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.mahout.math; + +import java.util.Iterator; +import java.util.NoSuchElementException; + +/** + * Provides a virtual vector that is really a row or column or diagonal of a matrix. + */ +public class MatrixVectorView extends AbstractVector { + private Matrix matrix; + private int row; + private int column; + private int rowStride; + private int columnStride; + private boolean isDense = true; + + public MatrixVectorView(Matrix matrix, int row, int column, int rowStride, int columnStride, boolean isDense) { + this(matrix, row, column, rowStride, columnStride); + this.isDense = isDense; + } + + public MatrixVectorView(Matrix matrix, int row, int column, int rowStride, int columnStride) { + super(viewSize(matrix, row, column, rowStride, columnStride)); + if (row < 0 || row >= matrix.rowSize()) { + throw new IndexException(row, matrix.rowSize()); + } + if (column < 0 || column >= matrix.columnSize()) { + throw new IndexException(column, matrix.columnSize()); + } + + this.matrix = matrix; + this.row = row; + this.column = column; + this.rowStride = rowStride; + this.columnStride = columnStride; + } + + private static int viewSize(Matrix matrix, int row, int column, int rowStride, int columnStride) { + if (rowStride != 0 && columnStride != 0) { + int n1 = (matrix.numRows() - row) / rowStride; + int n2 = (matrix.numCols() - column) / columnStride; + return Math.min(n1, n2); + } else if (rowStride > 0) { + return (matrix.numRows() - row) / rowStride; + } else { + return (matrix.numCols() - column) / columnStride; + } + } + + /** + * @return true iff the {@link Vector} implementation should be considered + * dense -- that it explicitly represents every value + */ + @Override + public boolean isDense() { + return isDense; + } + + /** + * @return true iff {@link Vector} should be considered to be iterable in + * index order in an efficient way. In particular this implies that {@link #iterator()} and + * {@link #iterateNonZero()} return elements in ascending order by index. + */ + @Override + public boolean isSequentialAccess() { + return true; + } + + /** + * Iterates over all elements <p> + * NOTE: Implementations may choose to reuse the Element returned + * for performance reasons, so if you need a copy of it, you should call {@link #getElement(int)} for + * the given index + * + * @return An {@link java.util.Iterator} over all elements + */ + @Override + public Iterator<Element> iterator() { + final LocalElement r = new LocalElement(0); + return new Iterator<Element>() { + private int i; + + @Override + public boolean hasNext() { + return i < size(); + } + + @Override + public Element next() { + if (i >= size()) { + throw new NoSuchElementException(); + } + r.index = i++; + return r; + } + + @Override + public void remove() { + throw new UnsupportedOperationException("Can't remove from a view"); + } + }; + } + + /** + * Iterates over all non-zero elements. <p> + * NOTE: Implementations may choose to reuse the Element + * returned for performance reasons, so if you need a copy of it, you should call {@link + * #getElement(int)} for the given index + * + * @return An {@link java.util.Iterator} over all non-zero elements + */ + @Override + public Iterator<Element> iterateNonZero() { + + return new Iterator<Element>() { + class NonZeroElement implements Element { + int index; + + @Override + public double get() { + return getQuick(index); + } + + @Override + public int index() { + return index; + } + + @Override + public void set(double value) { + invalidateCachedLength(); + setQuick(index, value); + } + } + + private final NonZeroElement element = new NonZeroElement(); + private int index = -1; + private int lookAheadIndex = -1; + + @Override + public boolean hasNext() { + if (lookAheadIndex == index) { // User calls hasNext() after a next() + lookAhead(); + } // else user called hasNext() repeatedly. + return lookAheadIndex < size(); + } + + private void lookAhead() { + lookAheadIndex++; + while (lookAheadIndex < size() && getQuick(lookAheadIndex) == 0.0) { + lookAheadIndex++; + } + } + + @Override + public Element next() { + if (lookAheadIndex == index) { // If user called next() without checking hasNext(). + lookAhead(); + } + + index = lookAheadIndex; + + if (index >= size()) { // If the end is reached. + throw new NoSuchElementException(); + } + + element.index = index; + return element; + } + + @Override + public void remove() { + throw new UnsupportedOperationException(); + } + }; + } + + /** + * Return the value at the given index, without checking bounds + * + * @param index an int index + * @return the double at the index + */ + @Override + public double getQuick(int index) { + return matrix.getQuick(row + rowStride * index, column + columnStride * index); + } + + /** + * Return an empty vector of the same underlying class as the receiver + * + * @return a Vector + */ + @Override + public Vector like() { + return matrix.like(size(), 1).viewColumn(0); + } + + @Override + public Vector like(int cardinality) { + return matrix.like(cardinality, 1).viewColumn(0); + } + + /** + * Set the value at the given index, without checking bounds + * + * @param index an int index into the receiver + * @param value a double value to set + */ + @Override + public void setQuick(int index, double value) { + matrix.setQuick(row + rowStride * index, column + columnStride * index, value); + } + + /** + * Return the number of values in the recipient + * + * @return an int + */ + @Override + public int getNumNondefaultElements() { + return size(); + } + + @Override + public double getLookupCost() { + // TODO: what is a genuine value here? + return 1; + } + + @Override + public double getIteratorAdvanceCost() { + // TODO: what is a genuine value here? + return 1; + } + + @Override + public boolean isAddConstantTime() { + // TODO: what is a genuine value here? + return true; + } + + @Override + protected Matrix matrixLike(int rows, int columns) { + return matrix.like(rows, columns); + } + + @Override + public Vector clone() { + MatrixVectorView r = (MatrixVectorView) super.clone(); + r.matrix = matrix.clone(); + r.row = row; + r.column = column; + r.rowStride = rowStride; + r.columnStride = columnStride; + return r; + } + + /** + * Used internally by assign() to update multiple indices and values at once. + * Only really useful for sparse vectors (especially SequentialAccessSparseVector). + * <p> + * If someone ever adds a new type of sparse vectors, this method must merge (index, value) pairs into the vector. + * + * @param updates a mapping of indices to values to merge in the vector. + */ + @Override + public void mergeUpdates(OrderedIntDoubleMapping updates) { + int[] indices = updates.getIndices(); + double[] values = updates.getValues(); + for (int i = 0; i < updates.getNumMappings(); ++i) { + matrix.setQuick(row + rowStride * indices[i], column + columnStride * indices[i], values[i]); + } + } +} http://git-wip-us.apache.org/repos/asf/mahout/blob/545648f6/core/src/main/java/org/apache/mahout/math/MatrixView.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/mahout/math/MatrixView.java b/core/src/main/java/org/apache/mahout/math/MatrixView.java new file mode 100644 index 0000000..951515b --- /dev/null +++ b/core/src/main/java/org/apache/mahout/math/MatrixView.java @@ -0,0 +1,160 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.mahout.math; + +import org.apache.mahout.math.flavor.MatrixFlavor; + +/** Implements subset view of a Matrix */ +public class MatrixView extends AbstractMatrix { + + private Matrix matrix; + + // the offset into the Matrix + private int[] offset; + + /** + * Construct a view of the matrix with given offset and cardinality + * + * @param matrix an underlying Matrix + * @param offset the int[2] offset into the underlying matrix + * @param size the int[2] size of the view + */ + public MatrixView(Matrix matrix, int[] offset, int[] size) { + super(size[ROW], size[COL]); + int rowOffset = offset[ROW]; + if (rowOffset < 0) { + throw new IndexException(rowOffset, rowSize()); + } + + int rowsRequested = size[ROW]; + if (rowOffset + rowsRequested > matrix.rowSize()) { + throw new IndexException(rowOffset + rowsRequested, matrix.rowSize()); + } + + int columnOffset = offset[COL]; + if (columnOffset < 0) { + throw new IndexException(columnOffset, columnSize()); + } + + int columnsRequested = size[COL]; + if (columnOffset + columnsRequested > matrix.columnSize()) { + throw new IndexException(columnOffset + columnsRequested, matrix.columnSize()); + } + this.matrix = matrix; + this.offset = offset; + } + + @Override + public Matrix clone() { + MatrixView clone = (MatrixView) super.clone(); + clone.matrix = matrix.clone(); + clone.offset = offset.clone(); + return clone; + } + + @Override + public double getQuick(int row, int column) { + return matrix.getQuick(offset[ROW] + row, offset[COL] + column); + } + + @Override + public Matrix like() { + return matrix.like(rowSize(), columnSize()); + } + + @Override + public Matrix like(int rows, int columns) { + return matrix.like(rows, columns); + } + + @Override + public void setQuick(int row, int column, double value) { + matrix.setQuick(offset[ROW] + row, offset[COL] + column, value); + } + + @Override + public int[] getNumNondefaultElements() { + return new int[]{rowSize(), columnSize()}; + + } + + @Override + public Matrix viewPart(int[] offset, int[] size) { + if (offset[ROW] < 0) { + throw new IndexException(offset[ROW], 0); + } + if (offset[ROW] + size[ROW] > rowSize()) { + throw new IndexException(offset[ROW] + size[ROW], rowSize()); + } + if (offset[COL] < 0) { + throw new IndexException(offset[COL], 0); + } + if (offset[COL] + size[COL] > columnSize()) { + throw new IndexException(offset[COL] + size[COL], columnSize()); + } + int[] origin = this.offset.clone(); + origin[ROW] += offset[ROW]; + origin[COL] += offset[COL]; + return new MatrixView(matrix, origin, size); + } + + @Override + public Matrix assignColumn(int column, Vector other) { + if (rowSize() != other.size()) { + throw new CardinalityException(rowSize(), other.size()); + } + for (int row = 0; row < rowSize(); row++) { + matrix.setQuick(row + offset[ROW], column + offset[COL], other + .getQuick(row)); + } + return this; + } + + @Override + public Matrix assignRow(int row, Vector other) { + if (columnSize() != other.size()) { + throw new CardinalityException(columnSize(), other.size()); + } + for (int col = 0; col < columnSize(); col++) { + matrix + .setQuick(row + offset[ROW], col + offset[COL], other.getQuick(col)); + } + return this; + } + + @Override + public Vector viewColumn(int column) { + if (column < 0 || column >= columnSize()) { + throw new IndexException(column, columnSize()); + } + return matrix.viewColumn(column + offset[COL]).viewPart(offset[ROW], rowSize()); + } + + @Override + public Vector viewRow(int row) { + if (row < 0 || row >= rowSize()) { + throw new IndexException(row, rowSize()); + } + return matrix.viewRow(row + offset[ROW]).viewPart(offset[COL], columnSize()); + } + + @Override + public MatrixFlavor getFlavor() { + return matrix.getFlavor(); + } +} http://git-wip-us.apache.org/repos/asf/mahout/blob/545648f6/core/src/main/java/org/apache/mahout/math/MurmurHash.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/mahout/math/MurmurHash.java b/core/src/main/java/org/apache/mahout/math/MurmurHash.java new file mode 100644 index 0000000..13f3a07 --- /dev/null +++ b/core/src/main/java/org/apache/mahout/math/MurmurHash.java @@ -0,0 +1,158 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.mahout.math; + +import com.google.common.primitives.Ints; + +import java.nio.ByteBuffer; +import java.nio.ByteOrder; + +/** + * <p>This is a very fast, non-cryptographic hash suitable for general hash-based + * lookup. See http://murmurhash.googlepages.com/ for more details. + * </p> + * <p>The C version of MurmurHash 2.0 found at that site was ported + * to Java by Andrzej Bialecki (ab at getopt org).</p> + */ +public final class MurmurHash { + + private MurmurHash() {} + + /** + * Hashes an int. + * @param data The int to hash. + * @param seed The seed for the hash. + * @return The 32 bit hash of the bytes in question. + */ + public static int hash(int data, int seed) { + return hash(ByteBuffer.wrap(Ints.toByteArray(data)), seed); + } + + /** + * Hashes bytes in an array. + * @param data The bytes to hash. + * @param seed The seed for the hash. + * @return The 32 bit hash of the bytes in question. + */ + public static int hash(byte[] data, int seed) { + return hash(ByteBuffer.wrap(data), seed); + } + + /** + * Hashes bytes in part of an array. + * @param data The data to hash. + * @param offset Where to start munging. + * @param length How many bytes to process. + * @param seed The seed to start with. + * @return The 32-bit hash of the data in question. + */ + public static int hash(byte[] data, int offset, int length, int seed) { + return hash(ByteBuffer.wrap(data, offset, length), seed); + } + + /** + * Hashes the bytes in a buffer from the current position to the limit. + * @param buf The bytes to hash. + * @param seed The seed for the hash. + * @return The 32 bit murmur hash of the bytes in the buffer. + */ + public static int hash(ByteBuffer buf, int seed) { + // save byte order for later restoration + ByteOrder byteOrder = buf.order(); + buf.order(ByteOrder.LITTLE_ENDIAN); + + int m = 0x5bd1e995; + int r = 24; + + int h = seed ^ buf.remaining(); + + while (buf.remaining() >= 4) { + int k = buf.getInt(); + + k *= m; + k ^= k >>> r; + k *= m; + + h *= m; + h ^= k; + } + + if (buf.remaining() > 0) { + ByteBuffer finish = ByteBuffer.allocate(4).order(ByteOrder.LITTLE_ENDIAN); + // for big-endian version, use this first: + // finish.position(4-buf.remaining()); + finish.put(buf).rewind(); + h ^= finish.getInt(); + h *= m; + } + + h ^= h >>> 13; + h *= m; + h ^= h >>> 15; + + buf.order(byteOrder); + return h; + } + + + public static long hash64A(byte[] data, int seed) { + return hash64A(ByteBuffer.wrap(data), seed); + } + + public static long hash64A(byte[] data, int offset, int length, int seed) { + return hash64A(ByteBuffer.wrap(data, offset, length), seed); + } + + public static long hash64A(ByteBuffer buf, int seed) { + ByteOrder byteOrder = buf.order(); + buf.order(ByteOrder.LITTLE_ENDIAN); + + long m = 0xc6a4a7935bd1e995L; + int r = 47; + + long h = seed ^ (buf.remaining() * m); + + while (buf.remaining() >= 8) { + long k = buf.getLong(); + + k *= m; + k ^= k >>> r; + k *= m; + + h ^= k; + h *= m; + } + + if (buf.remaining() > 0) { + ByteBuffer finish = ByteBuffer.allocate(8).order(ByteOrder.LITTLE_ENDIAN); + // for big-endian version, do this first: + // finish.position(8-buf.remaining()); + finish.put(buf).rewind(); + h ^= finish.getLong(); + h *= m; + } + + h ^= h >>> r; + h *= m; + h ^= h >>> r; + + buf.order(byteOrder); + return h; + } + +} http://git-wip-us.apache.org/repos/asf/mahout/blob/545648f6/core/src/main/java/org/apache/mahout/math/MurmurHash3.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/mahout/math/MurmurHash3.java b/core/src/main/java/org/apache/mahout/math/MurmurHash3.java new file mode 100644 index 0000000..bd0bb6b --- /dev/null +++ b/core/src/main/java/org/apache/mahout/math/MurmurHash3.java @@ -0,0 +1,84 @@ +/* + * This code is public domain. + * + * The MurmurHash3 algorithm was created by Austin Appleby and put into the public domain. + * See http://code.google.com/p/smhasher/ + * + * This java port was authored by + * Yonik Seeley and was placed into the public domain per + * https://github.com/yonik/java_util/blob/master/src/util/hash/MurmurHash3.java. + */ + +package org.apache.mahout.math; + +/** + * <p> + * This produces exactly the same hash values as the final C+ + + * version of MurmurHash3 and is thus suitable for producing the same hash values across + * platforms. + * <p> + * The 32 bit x86 version of this hash should be the fastest variant for relatively short keys like ids. + * <p> + * Note - The x86 and x64 versions do _not_ produce the same results, as the + * algorithms are optimized for their respective platforms. + * <p> + * See also http://github.com/yonik/java_util for future updates to this file. + */ +public final class MurmurHash3 { + + private MurmurHash3() {} + + /** Returns the MurmurHash3_x86_32 hash. */ + public static int murmurhash3x8632(byte[] data, int offset, int len, int seed) { + + int c1 = 0xcc9e2d51; + int c2 = 0x1b873593; + + int h1 = seed; + int roundedEnd = offset + (len & 0xfffffffc); // round down to 4 byte block + + for (int i = offset; i < roundedEnd; i += 4) { + // little endian load order + int k1 = (data[i] & 0xff) | ((data[i + 1] & 0xff) << 8) | ((data[i + 2] & 0xff) << 16) | (data[i + 3] << 24); + k1 *= c1; + k1 = (k1 << 15) | (k1 >>> 17); // ROTL32(k1,15); + k1 *= c2; + + h1 ^= k1; + h1 = (h1 << 13) | (h1 >>> 19); // ROTL32(h1,13); + h1 = h1 * 5 + 0xe6546b64; + } + + // tail + int k1 = 0; + + switch(len & 0x03) { + case 3: + k1 = (data[roundedEnd + 2] & 0xff) << 16; + // fallthrough + case 2: + k1 |= (data[roundedEnd + 1] & 0xff) << 8; + // fallthrough + case 1: + k1 |= data[roundedEnd] & 0xff; + k1 *= c1; + k1 = (k1 << 15) | (k1 >>> 17); // ROTL32(k1,15); + k1 *= c2; + h1 ^= k1; + default: + } + + // finalization + h1 ^= len; + + // fmix(h1); + h1 ^= h1 >>> 16; + h1 *= 0x85ebca6b; + h1 ^= h1 >>> 13; + h1 *= 0xc2b2ae35; + h1 ^= h1 >>> 16; + + return h1; + } + +}
