http://git-wip-us.apache.org/repos/asf/mahout/blob/545648f6/core/src/main/java/org/apache/mahout/math/NamedVector.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/mahout/math/NamedVector.java b/core/src/main/java/org/apache/mahout/math/NamedVector.java new file mode 100644 index 0000000..d4fa609 --- /dev/null +++ b/core/src/main/java/org/apache/mahout/math/NamedVector.java @@ -0,0 +1,328 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.mahout.math; + +import org.apache.mahout.math.function.DoubleDoubleFunction; +import org.apache.mahout.math.function.DoubleFunction; + +public class NamedVector implements Vector { + + private Vector delegate; + private String name; + + public NamedVector() { + } + + public NamedVector(NamedVector other) { + this.delegate = other.getDelegate(); + this.name = other.getName(); + } + + public NamedVector(Vector delegate, String name) { + if (delegate == null || name == null) { + throw new IllegalArgumentException(); + } + this.delegate = delegate; + this.name = name; + } + + public String getName() { + return name; + } + + public Vector getDelegate() { + return delegate; + } + + @Override + public int hashCode() { + return delegate.hashCode(); + } + + /** + * To not break transitivity with other {@link Vector}s, this does not compare name. + */ + @SuppressWarnings("EqualsWhichDoesntCheckParameterClass") + @Override + public boolean equals(Object other) { + return delegate.equals(other); + } + + @SuppressWarnings("CloneDoesntCallSuperClone") + @Override + public NamedVector clone() { + return new NamedVector(delegate.clone(), name); + } + + @Override + public Iterable<Element> all() { + return delegate.all(); + } + + @Override + public Iterable<Element> nonZeroes() { + return delegate.nonZeroes(); + } + + @Override + public String asFormatString() { + return toString(); + } + + @Override + public String toString() { + StringBuilder bldr = new StringBuilder(); + bldr.append(name).append(':').append(delegate.toString()); + return bldr.toString(); + } + + @Override + public Vector assign(double value) { + return delegate.assign(value); + } + + @Override + public Vector assign(double[] values) { + return delegate.assign(values); + } + + @Override + public Vector assign(Vector other) { + return delegate.assign(other); + } + + @Override + public Vector assign(DoubleFunction function) { + return delegate.assign(function); + } + + @Override + public Vector assign(Vector other, DoubleDoubleFunction function) { + return delegate.assign(other, function); + } + + @Override + public Vector assign(DoubleDoubleFunction f, double y) { + return delegate.assign(f, y); + } + + @Override + public int size() { + return delegate.size(); + } + + @Override + public boolean isDense() { + return delegate.isDense(); + } + + @Override + public boolean isSequentialAccess() { + return delegate.isSequentialAccess(); + } + + @Override + public Element getElement(int index) { + return delegate.getElement(index); + } + + /** + * Merge a set of (index, value) pairs into the vector. + * + * @param updates an ordered mapping of indices to values to be merged in. + */ + @Override + public void mergeUpdates(OrderedIntDoubleMapping updates) { + delegate.mergeUpdates(updates); + } + + @Override + public Vector divide(double x) { + return delegate.divide(x); + } + + @Override + public double dot(Vector x) { + return delegate.dot(x); + } + + @Override + public double get(int index) { + return delegate.get(index); + } + + @Override + public double getQuick(int index) { + return delegate.getQuick(index); + } + + @Override + public NamedVector like() { + return new NamedVector(delegate.like(), name); + } + + @Override + public Vector like(int cardinality) { + return new NamedVector(delegate.like(cardinality), name); + } + + @Override + public Vector minus(Vector x) { + return delegate.minus(x); + } + + @Override + public Vector normalize() { + return delegate.normalize(); + } + + @Override + public Vector normalize(double power) { + return delegate.normalize(power); + } + + @Override + public Vector logNormalize() { + return delegate.logNormalize(); + } + + @Override + public Vector logNormalize(double power) { + return delegate.logNormalize(power); + } + + @Override + public double norm(double power) { + return delegate.norm(power); + } + + @Override + public double maxValue() { + return delegate.maxValue(); + } + + @Override + public int maxValueIndex() { + return delegate.maxValueIndex(); + } + + @Override + public double minValue() { + return delegate.minValue(); + } + + @Override + public int minValueIndex() { + return delegate.minValueIndex(); + } + + @Override + public Vector plus(double x) { + return delegate.plus(x); + } + + @Override + public Vector plus(Vector x) { + return delegate.plus(x); + } + + @Override + public void set(int index, double value) { + delegate.set(index, value); + } + + @Override + public void setQuick(int index, double value) { + delegate.setQuick(index, value); + } + + @Override + public void incrementQuick(int index, double increment) { + delegate.incrementQuick(index, increment); + } + + @Override + public int getNumNonZeroElements() { + return delegate.getNumNonZeroElements(); + } + + @Override + public int getNumNondefaultElements() { + return delegate.getNumNondefaultElements(); + } + + @Override + public Vector times(double x) { + return delegate.times(x); + } + + @Override + public Vector times(Vector x) { + return delegate.times(x); + } + + @Override + public Vector viewPart(int offset, int length) { + return delegate.viewPart(offset, length); + } + + @Override + public double zSum() { + return delegate.zSum(); + } + + @Override + public Matrix cross(Vector other) { + return delegate.cross(other); + } + + @Override + public double aggregate(DoubleDoubleFunction aggregator, DoubleFunction map) { + return delegate.aggregate(aggregator, map); + } + + @Override + public double aggregate(Vector other, DoubleDoubleFunction aggregator, DoubleDoubleFunction combiner) { + return delegate.aggregate(other, aggregator, combiner); + } + + @Override + public double getLengthSquared() { + return delegate.getLengthSquared(); + } + + @Override + public double getDistanceSquared(Vector v) { + return delegate.getDistanceSquared(v); + } + + @Override + public double getLookupCost() { + return delegate.getLookupCost(); + } + + @Override + public double getIteratorAdvanceCost() { + return delegate.getIteratorAdvanceCost(); + } + + @Override + public boolean isAddConstantTime() { + return delegate.isAddConstantTime(); + } +}
http://git-wip-us.apache.org/repos/asf/mahout/blob/545648f6/core/src/main/java/org/apache/mahout/math/OldQRDecomposition.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/mahout/math/OldQRDecomposition.java b/core/src/main/java/org/apache/mahout/math/OldQRDecomposition.java new file mode 100644 index 0000000..e1552e4 --- /dev/null +++ b/core/src/main/java/org/apache/mahout/math/OldQRDecomposition.java @@ -0,0 +1,234 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * Copyright 1999 CERN - European Organization for Nuclear Research. + * Permission to use, copy, modify, distribute and sell this software and its documentation for any purpose + * is hereby granted without fee, provided that the above copyright notice appear in all copies and + * that both that copyright notice and this permission notice appear in supporting documentation. + * CERN makes no representations about the suitability of this software for any purpose. + * It is provided "as is" without expressed or implied warranty. + */ +package org.apache.mahout.math; + +import org.apache.mahout.math.function.Functions; + +import java.util.Locale; + + +/** + For an <tt>m x n</tt> matrix <tt>A</tt> with <tt>m >= n</tt>, the QR decomposition is an <tt>m x n</tt> + orthogonal matrix <tt>Q</tt> and an <tt>n x n</tt> upper triangular matrix <tt>R</tt> so that + <tt>A = Q*R</tt>. + <P> + The QR decompostion always exists, even if the matrix does not have + full rank, so the constructor will never fail. The primary use of the + QR decomposition is in the least squares solution of nonsquare systems + of simultaneous linear equations. This will fail if <tt>isFullRank()</tt> + returns <tt>false</tt>. + */ + +/** partially deprecated until unit tests are in place. Until this time, this class/interface is unsupported. */ +public class OldQRDecomposition implements QR { + + /** Array for internal storage of decomposition. */ + private final Matrix qr; + + /** Row and column dimensions. */ + private final int originalRows; + private final int originalColumns; + + /** Array for internal storage of diagonal of R. */ + private final Vector rDiag; + + /** + * Constructs and returns a new QR decomposition object; computed by Householder reflections; The decomposed matrices + * can be retrieved via instance methods of the returned decomposition object. + * + * @param a A rectangular matrix. + * @throws IllegalArgumentException if {@code A.rows() < A.columns()} + */ + + public OldQRDecomposition(Matrix a) { + + // Initialize. + qr = a.clone(); + originalRows = a.numRows(); + originalColumns = a.numCols(); + rDiag = new DenseVector(originalColumns); + + // precompute and cache some views to avoid regenerating them time and again + Vector[] QRcolumnsPart = new Vector[originalColumns]; + for (int k = 0; k < originalColumns; k++) { + QRcolumnsPart[k] = qr.viewColumn(k).viewPart(k, originalRows - k); + } + + // Main loop. + for (int k = 0; k < originalColumns; k++) { + //DoubleMatrix1D QRcolk = QR.viewColumn(k).viewPart(k,m-k); + // Compute 2-norm of k-th column without under/overflow. + double nrm = 0; + //if (k<m) nrm = QRcolumnsPart[k].aggregate(hypot,F.identity); + + for (int i = k; i < originalRows; i++) { // fixes bug reported by [email protected] + nrm = Algebra.hypot(nrm, qr.getQuick(i, k)); + } + + + if (nrm != 0.0) { + // Form k-th Householder vector. + if (qr.getQuick(k, k) < 0) { + nrm = -nrm; + } + QRcolumnsPart[k].assign(Functions.div(nrm)); + /* + for (int i = k; i < m; i++) { + QR[i][k] /= nrm; + } + */ + + qr.setQuick(k, k, qr.getQuick(k, k) + 1); + + // Apply transformation to remaining columns. + for (int j = k + 1; j < originalColumns; j++) { + Vector QRcolj = qr.viewColumn(j).viewPart(k, originalRows - k); + double s = QRcolumnsPart[k].dot(QRcolj); + /* + // fixes bug reported by John Chambers + DoubleMatrix1D QRcolj = QR.viewColumn(j).viewPart(k,m-k); + double s = QRcolumnsPart[k].zDotProduct(QRcolumns[j]); + double s = 0.0; + for (int i = k; i < m; i++) { + s += QR[i][k]*QR[i][j]; + } + */ + s = -s / qr.getQuick(k, k); + //QRcolumnsPart[j].assign(QRcolumns[k], F.plusMult(s)); + + for (int i = k; i < originalRows; i++) { + qr.setQuick(i, j, qr.getQuick(i, j) + s * qr.getQuick(i, k)); + } + + } + } + rDiag.setQuick(k, -nrm); + } + } + + /** + * Generates and returns the (economy-sized) orthogonal factor <tt>Q</tt>. + * + * @return <tt>Q</tt> + */ + @Override + public Matrix getQ() { + int columns = Math.min(originalColumns, originalRows); + Matrix q = qr.like(originalRows, columns); + for (int k = columns - 1; k >= 0; k--) { + Vector QRcolk = qr.viewColumn(k).viewPart(k, originalRows - k); + q.set(k, k, 1); + for (int j = k; j < columns; j++) { + if (qr.get(k, k) != 0) { + Vector Qcolj = q.viewColumn(j).viewPart(k, originalRows - k); + double s = -QRcolk.dot(Qcolj) / qr.get(k, k); + Qcolj.assign(QRcolk, Functions.plusMult(s)); + } + } + } + return q; + } + + /** + * Returns the upper triangular factor, <tt>R</tt>. + * + * @return <tt>R</tt> + */ + @Override + public Matrix getR() { + int rows = Math.min(originalRows, originalColumns); + Matrix r = qr.like(rows, originalColumns); + for (int i = 0; i < rows; i++) { + for (int j = 0; j < originalColumns; j++) { + if (i < j) { + r.setQuick(i, j, qr.getQuick(i, j)); + } else if (i == j) { + r.setQuick(i, j, rDiag.getQuick(i)); + } else { + r.setQuick(i, j, 0); + } + } + } + return r; + } + + /** + * Returns whether the matrix <tt>A</tt> has full rank. + * + * @return true if <tt>R</tt>, and hence <tt>A</tt>, has full rank. + */ + @Override + public boolean hasFullRank() { + for (int j = 0; j < originalColumns; j++) { + if (rDiag.getQuick(j) == 0) { + return false; + } + } + return true; + } + + /** + * Least squares solution of <tt>A*X = B</tt>; <tt>returns X</tt>. + * + * @param B A matrix with as many rows as <tt>A</tt> and any number of columns. + * @return <tt>X</tt> that minimizes the two norm of <tt>Q*R*X - B</tt>. + * @throws IllegalArgumentException if <tt>B.rows() != A.rows()</tt>. + */ + @Override + public Matrix solve(Matrix B) { + if (B.numRows() != originalRows) { + throw new IllegalArgumentException("Matrix row dimensions must agree."); + } + + int columns = B.numCols(); + Matrix x = B.like(originalColumns, columns); + + // this can all be done a bit more efficiently if we don't actually + // form explicit versions of Q^T and R but this code isn't soo bad + // and it is much easier to understand + Matrix qt = getQ().transpose(); + Matrix y = qt.times(B); + + Matrix r = getR(); + for (int k = Math.min(originalColumns, originalRows) - 1; k >= 0; k--) { + // X[k,] = Y[k,] / R[k,k], note that X[k,] starts with 0 so += is same as = + x.viewRow(k).assign(y.viewRow(k), Functions.plusMult(1 / r.get(k, k))); + + // Y[0:(k-1),] -= R[0:(k-1),k] * X[k,] + Vector rColumn = r.viewColumn(k).viewPart(0, k); + for (int c = 0; c < columns; c++) { + y.viewColumn(c).viewPart(0, k).assign(rColumn, Functions.plusMult(-x.get(k, c))); + } + } + return x; + } + + /** + * Returns a rough string rendition of a QR. + */ + @Override + public String toString() { + return String.format(Locale.ENGLISH, "QR(%d,%d,fullRank=%s)", originalColumns, originalRows, hasFullRank()); + } +} http://git-wip-us.apache.org/repos/asf/mahout/blob/545648f6/core/src/main/java/org/apache/mahout/math/OrderedIntDoubleMapping.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/mahout/math/OrderedIntDoubleMapping.java b/core/src/main/java/org/apache/mahout/math/OrderedIntDoubleMapping.java new file mode 100644 index 0000000..7c6ad11 --- /dev/null +++ b/core/src/main/java/org/apache/mahout/math/OrderedIntDoubleMapping.java @@ -0,0 +1,265 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.mahout.math; + +import java.io.Serializable; + +public final class OrderedIntDoubleMapping implements Serializable, Cloneable { + + static final double DEFAULT_VALUE = 0.0; + + private int[] indices; + private double[] values; + private int numMappings; + + // If true, doesn't allow DEFAULT_VALUEs in the mapping (adding a zero discards it). Otherwise, a DEFAULT_VALUE is + // treated like any other value. + private boolean noDefault = true; + + OrderedIntDoubleMapping(boolean noDefault) { + this(); + this.noDefault = noDefault; + } + + OrderedIntDoubleMapping() { + // no-arg constructor for deserializer + this(11); + } + + OrderedIntDoubleMapping(int capacity) { + indices = new int[capacity]; + values = new double[capacity]; + numMappings = 0; + } + + OrderedIntDoubleMapping(int[] indices, double[] values, int numMappings) { + this.indices = indices; + this.values = values; + this.numMappings = numMappings; + } + + public int[] getIndices() { + return indices; + } + + public int indexAt(int offset) { + return indices[offset]; + } + + public void setIndexAt(int offset, int index) { + indices[offset] = index; + } + + public double[] getValues() { + return values; + } + + public void setValueAt(int offset, double value) { + values[offset] = value; + } + + + public int getNumMappings() { + return numMappings; + } + + private void growTo(int newCapacity) { + if (newCapacity > indices.length) { + int[] newIndices = new int[newCapacity]; + System.arraycopy(indices, 0, newIndices, 0, numMappings); + indices = newIndices; + double[] newValues = new double[newCapacity]; + System.arraycopy(values, 0, newValues, 0, numMappings); + values = newValues; + } + } + + private int find(int index) { + int low = 0; + int high = numMappings - 1; + while (low <= high) { + int mid = low + (high - low >>> 1); + int midVal = indices[mid]; + if (midVal < index) { + low = mid + 1; + } else if (midVal > index) { + high = mid - 1; + } else { + return mid; + } + } + return -(low + 1); + } + + public double get(int index) { + int offset = find(index); + return offset >= 0 ? values[offset] : DEFAULT_VALUE; + } + + public void set(int index, double value) { + if (numMappings == 0 || index > indices[numMappings - 1]) { + if (!noDefault || value != DEFAULT_VALUE) { + if (numMappings >= indices.length) { + growTo(Math.max((int) (1.2 * numMappings), numMappings + 1)); + } + indices[numMappings] = index; + values[numMappings] = value; + ++numMappings; + } + } else { + int offset = find(index); + if (offset >= 0) { + insertOrUpdateValueIfPresent(offset, value); + } else { + insertValueIfNotDefault(index, offset, value); + } + } + } + + /** + * Merges the updates in linear time by allocating new arrays and iterating through the existing indices and values + * and the updates' indices and values at the same time while selecting the minimum index to set at each step. + * @param updates another list of mappings to be merged in. + */ + public void merge(OrderedIntDoubleMapping updates) { + int[] updateIndices = updates.getIndices(); + double[] updateValues = updates.getValues(); + + int newNumMappings = numMappings + updates.getNumMappings(); + int newCapacity = Math.max((int) (1.2 * newNumMappings), newNumMappings + 1); + int[] newIndices = new int[newCapacity]; + double[] newValues = new double[newCapacity]; + + int k = 0; + int i = 0, j = 0; + for (; i < numMappings && j < updates.getNumMappings(); ++k) { + if (indices[i] < updateIndices[j]) { + newIndices[k] = indices[i]; + newValues[k] = values[i]; + ++i; + } else if (indices[i] > updateIndices[j]) { + newIndices[k] = updateIndices[j]; + newValues[k] = updateValues[j]; + ++j; + } else { + newIndices[k] = updateIndices[j]; + newValues[k] = updateValues[j]; + ++i; + ++j; + } + } + + for (; i < numMappings; ++i, ++k) { + newIndices[k] = indices[i]; + newValues[k] = values[i]; + } + for (; j < updates.getNumMappings(); ++j, ++k) { + newIndices[k] = updateIndices[j]; + newValues[k] = updateValues[j]; + } + + indices = newIndices; + values = newValues; + numMappings = k; + } + + @Override + public int hashCode() { + int result = 0; + for (int i = 0; i < numMappings; i++) { + result = 31 * result + indices[i]; + result = 31 * result + (int) Double.doubleToRawLongBits(values[i]); + } + return result; + } + + @Override + public boolean equals(Object o) { + if (o instanceof OrderedIntDoubleMapping) { + OrderedIntDoubleMapping other = (OrderedIntDoubleMapping) o; + if (numMappings == other.numMappings) { + for (int i = 0; i < numMappings; i++) { + if (indices[i] != other.indices[i] || values[i] != other.values[i]) { + return false; + } + } + return true; + } + } + return false; + } + + @Override + public String toString() { + StringBuilder result = new StringBuilder(10 * numMappings); + for (int i = 0; i < numMappings; i++) { + result.append('('); + result.append(indices[i]); + result.append(','); + result.append(values[i]); + result.append(')'); + } + return result.toString(); + } + + @SuppressWarnings("CloneDoesntCallSuperClone") + @Override + public OrderedIntDoubleMapping clone() { + return new OrderedIntDoubleMapping(indices.clone(), values.clone(), numMappings); + } + + public void increment(int index, double increment) { + int offset = find(index); + if (offset >= 0) { + double newValue = values[offset] + increment; + insertOrUpdateValueIfPresent(offset, newValue); + } else { + insertValueIfNotDefault(index, offset, increment); + } + } + + private void insertValueIfNotDefault(int index, int offset, double value) { + if (!noDefault || value != DEFAULT_VALUE) { + if (numMappings >= indices.length) { + growTo(Math.max((int) (1.2 * numMappings), numMappings + 1)); + } + int at = -offset - 1; + if (numMappings > at) { + for (int i = numMappings - 1, j = numMappings; i >= at; i--, j--) { + indices[j] = indices[i]; + values[j] = values[i]; + } + } + indices[at] = index; + values[at] = value; + numMappings++; + } + } + + private void insertOrUpdateValueIfPresent(int offset, double newValue) { + if (noDefault && newValue == DEFAULT_VALUE) { + for (int i = offset + 1, j = offset; i < numMappings; i++, j++) { + indices[j] = indices[i]; + values[j] = values[i]; + } + numMappings--; + } else { + values[offset] = newValue; + } + } +} http://git-wip-us.apache.org/repos/asf/mahout/blob/545648f6/core/src/main/java/org/apache/mahout/math/OrthonormalityVerifier.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/mahout/math/OrthonormalityVerifier.java b/core/src/main/java/org/apache/mahout/math/OrthonormalityVerifier.java new file mode 100644 index 0000000..e8dd2b1 --- /dev/null +++ b/core/src/main/java/org/apache/mahout/math/OrthonormalityVerifier.java @@ -0,0 +1,46 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.mahout.math; + +import com.google.common.collect.Lists; + +import java.util.List; + +public final class OrthonormalityVerifier { + + private OrthonormalityVerifier() { + } + + public static VectorIterable pairwiseInnerProducts(Iterable<MatrixSlice> basis) { + DenseMatrix out = null; + for (MatrixSlice slice1 : basis) { + List<Double> dots = Lists.newArrayList(); + for (MatrixSlice slice2 : basis) { + dots.add(slice1.vector().dot(slice2.vector())); + } + if (out == null) { + out = new DenseMatrix(dots.size(), dots.size()); + } + for (int i = 0; i < dots.size(); i++) { + out.set(slice1.index(), i, dots.get(i)); + } + } + return out; + } + +} http://git-wip-us.apache.org/repos/asf/mahout/blob/545648f6/core/src/main/java/org/apache/mahout/math/PermutedVectorView.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/mahout/math/PermutedVectorView.java b/core/src/main/java/org/apache/mahout/math/PermutedVectorView.java new file mode 100644 index 0000000..e46f326 --- /dev/null +++ b/core/src/main/java/org/apache/mahout/math/PermutedVectorView.java @@ -0,0 +1,250 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.mahout.math; + +import java.util.Iterator; + +import com.google.common.collect.AbstractIterator; + +/** + * Provides a permuted view of a vector. + */ +public class PermutedVectorView extends AbstractVector { + private final Vector vector; // the vector containing the data + private final int[] pivot; // convert from external index to internal + private final int[] unpivot; // convert from internal index to external + + public PermutedVectorView(Vector vector, int[] pivot, int[] unpivot) { + super(vector.size()); + this.vector = vector; + this.pivot = pivot; + this.unpivot = unpivot; + } + + public PermutedVectorView(Vector vector, int[] pivot) { + this(vector, pivot, reversePivotPermutation(pivot)); + } + + private static int[] reversePivotPermutation(int[] pivot) { + int[] unpivot1 = new int[pivot.length]; + for (int i = 0; i < pivot.length; i++) { + unpivot1[pivot[i]] = i; + } + return unpivot1; + } + + /** + * Subclasses must override to return an appropriately sparse or dense result + * + * @param rows the row cardinality + * @param columns the column cardinality + * @return a Matrix + */ + @Override + protected Matrix matrixLike(int rows, int columns) { + if (vector.isDense()) { + return new DenseMatrix(rows, columns); + } else { + return new SparseRowMatrix(rows, columns); + } + } + + /** + * Used internally by assign() to update multiple indices and values at once. + * Only really useful for sparse vectors (especially SequentialAccessSparseVector). + * <p> + * If someone ever adds a new type of sparse vectors, this method must merge (index, value) pairs into the vector. + * + * @param updates a mapping of indices to values to merge in the vector. + */ + @Override + public void mergeUpdates(OrderedIntDoubleMapping updates) { + for (int i = 0; i < updates.getNumMappings(); ++i) { + updates.setIndexAt(i, pivot[updates.indexAt(i)]); + } + vector.mergeUpdates(updates); + } + + /** + * @return true iff this implementation should be considered dense -- that it explicitly + * represents every value + */ + @Override + public boolean isDense() { + return vector.isDense(); + } + + /** + * If the view is permuted, the elements cannot be accessed in the same order. + * + * @return true iff this implementation should be considered to be iterable in index order in an + * efficient way. In particular this implies that {@link #iterator()} and {@link + * #iterateNonZero()} return elements in ascending order by index. + */ + @Override + public boolean isSequentialAccess() { + return false; + } + + /** + * Iterates over all elements <p> * NOTE: Implementations may choose to reuse the Element + * returned for performance reasons, so if you need a copy of it, you should call {@link + * #getElement(int)} for the given index + * + * @return An {@link java.util.Iterator} over all elements + */ + @Override + public Iterator<Element> iterator() { + return new AbstractIterator<Element>() { + private final Iterator<Element> i = vector.all().iterator(); + + @Override + protected Vector.Element computeNext() { + if (i.hasNext()) { + final Element x = i.next(); + return new Element() { + private final int index = unpivot[x.index()]; + + @Override + public double get() { + return x.get(); + } + + @Override + public int index() { + return index; + } + + @Override + public void set(double value) { + x.set(value); + } + }; + } else { + return endOfData(); + } + } + }; + } + + /** + * Iterates over all non-zero elements. <p> NOTE: Implementations may choose to reuse the Element + * returned for performance reasons, so if you need a copy of it, you should call {@link + * #getElement(int)} for the given index + * + * @return An {@link java.util.Iterator} over all non-zero elements + */ + @Override + public Iterator<Element> iterateNonZero() { + return new AbstractIterator<Element>() { + private final Iterator<Element> i = vector.nonZeroes().iterator(); + + @Override + protected Vector.Element computeNext() { + if (i.hasNext()) { + final Element x = i.next(); + return new Element() { + private final int index = unpivot[x.index()]; + + @Override + public double get() { + return x.get(); + } + + @Override + public int index() { + return index; + } + + @Override + public void set(double value) { + x.set(value); + } + }; + } else { + return endOfData(); + } + } + }; + } + + /** + * Return the value at the given index, without checking bounds + * + * @param index an int index + * @return the double at the index + */ + @Override + public double getQuick(int index) { + return vector.getQuick(pivot[index]); + } + + /** + * Return an empty vector of the same underlying class as the receiver + * + * @return a Vector + */ + @Override + public Vector like() { + return vector.like(); + } + + @Override + public Vector like(int cardinality) { + return vector.like(cardinality); + } + + /** + * Set the value at the given index, without checking bounds + * + * @param index an int index into the receiver + * @param value a double value to set + */ + @Override + public void setQuick(int index, double value) { + vector.setQuick(pivot[index], value); + } + + /** Return the number of values in the recipient */ + @Override + public int getNumNondefaultElements() { + return vector.getNumNondefaultElements(); + } + + @Override + public int getNumNonZeroElements() { + // Return the number of nonzeros in the recipient, + // so potentially don't have to go through our iterator + return vector.getNumNonZeroElements(); + } + + @Override + public double getLookupCost() { + return vector.getLookupCost(); + } + + @Override + public double getIteratorAdvanceCost() { + return vector.getIteratorAdvanceCost(); + } + + @Override + public boolean isAddConstantTime() { + return vector.isAddConstantTime(); + } +} http://git-wip-us.apache.org/repos/asf/mahout/blob/545648f6/core/src/main/java/org/apache/mahout/math/PersistentObject.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/mahout/math/PersistentObject.java b/core/src/main/java/org/apache/mahout/math/PersistentObject.java new file mode 100644 index 0000000..f1d4293 --- /dev/null +++ b/core/src/main/java/org/apache/mahout/math/PersistentObject.java @@ -0,0 +1,58 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +/* +Copyright 1999 CERN - European Organization for Nuclear Research. +Permission to use, copy, modify, distribute and sell this software and its documentation for any purpose +is hereby granted without fee, provided that the above copyright notice appear in all copies and +that both that copyright notice and this permission notice appear in supporting documentation. +CERN makes no representations about the suitability of this software for any purpose. +It is provided "as is" without expressed or implied warranty. +*/ +package org.apache.mahout.math; + +/** + * This empty class is the common root for all persistent capable classes. + * If this class inherits from <tt>java.lang.Object</tt> then all subclasses are serializable with + * the standard Java serialization mechanism. + * If this class inherits from <tt>com.objy.db.app.ooObj</tt> then all subclasses are + * <i>additionally</i> serializable with the Objectivity ODBMS persistance mechanism. + * Thus, by modifying the inheritance of this class the entire tree of subclasses can + * be switched to Objectivity compatibility (and back) with minimum effort. + */ +public abstract class PersistentObject implements java.io.Serializable, Cloneable { + + /** Not yet commented. */ + protected PersistentObject() { + } + + /** + * Returns a copy of the receiver. This default implementation does not nothing except making the otherwise + * <tt>protected</tt> clone method <tt>public</tt>. + * + * @return a copy of the receiver. + */ + @Override + public Object clone() { + try { + return super.clone(); + } catch (CloneNotSupportedException exc) { + throw new InternalError(); //should never happen since we are cloneable + } + } +} http://git-wip-us.apache.org/repos/asf/mahout/blob/545648f6/core/src/main/java/org/apache/mahout/math/PivotedMatrix.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/mahout/math/PivotedMatrix.java b/core/src/main/java/org/apache/mahout/math/PivotedMatrix.java new file mode 100644 index 0000000..fba1e98 --- /dev/null +++ b/core/src/main/java/org/apache/mahout/math/PivotedMatrix.java @@ -0,0 +1,288 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.mahout.math; + +import com.google.common.base.Preconditions; + +/** + * Matrix that allows transparent row and column permutation. + */ +public class PivotedMatrix extends AbstractMatrix { + + private Matrix base; + private int[] rowPivot; + private int[] rowUnpivot; + private int[] columnPivot; + private int[] columnUnpivot; + + public PivotedMatrix(Matrix base, int[] pivot) { + this(base, pivot, java.util.Arrays.copyOf(pivot, pivot.length)); + } + public PivotedMatrix(Matrix base, int[] rowPivot, int[] columnPivot) { + super(base.rowSize(), base.columnSize()); + + this.base = base; + this.rowPivot = rowPivot; + rowUnpivot = invert(rowPivot); + + this.columnPivot = columnPivot; + columnUnpivot = invert(columnPivot); + } + + public PivotedMatrix(Matrix base) { + this(base, identityPivot(base.rowSize()),identityPivot(base.columnSize())); + } + + /** + * Swaps indexes i and j. This does both row and column permutation. + * + * @param i First index to swap. + * @param j Second index to swap. + */ + public void swap(int i, int j) { + swapRows(i, j); + swapColumns(i, j); + } + + /** + * Swaps indexes i and j. This does just row permutation. + * + * @param i First index to swap. + * @param j Second index to swap. + */ + public void swapRows(int i, int j) { + swap(rowPivot, rowUnpivot, i, j); + } + + + /** + * Swaps indexes i and j. This does just row permutation. + * + * @param i First index to swap. + * @param j Second index to swap. + */ + public void swapColumns(int i, int j) { + swap(columnPivot, columnUnpivot, i, j); + } + + private static void swap(int[] pivot, int[] unpivot, int i, int j) { + Preconditions.checkPositionIndex(i, pivot.length); + Preconditions.checkPositionIndex(j, pivot.length); + if (i != j) { + int tmp = pivot[i]; + pivot[i] = pivot[j]; + pivot[j] = tmp; + + unpivot[pivot[i]] = i; + unpivot[pivot[j]] = j; + } + } + + /** + * Assign the other vector values to the column of the receiver + * + * @param column the int row to assign + * @param other a Vector + * @return the modified receiver + * @throws org.apache.mahout.math.CardinalityException + * if the cardinalities differ + */ + @Override + public Matrix assignColumn(int column, Vector other) { + // note the reversed pivoting for other + return base.assignColumn(columnPivot[column], new PermutedVectorView(other, rowUnpivot, rowPivot)); + } + + /** + * Assign the other vector values to the row of the receiver + * + * @param row the int row to assign + * @param other a Vector + * @return the modified receiver + * @throws org.apache.mahout.math.CardinalityException + * if the cardinalities differ + */ + @Override + public Matrix assignRow(int row, Vector other) { + // note the reversed pivoting for other + return base.assignRow(rowPivot[row], new PermutedVectorView(other, columnUnpivot, columnPivot)); + } + + /** + * Return the column at the given index + * + * @param column an int column index + * @return a Vector at the index + * @throws org.apache.mahout.math.IndexException + * if the index is out of bounds + */ + @Override + public Vector viewColumn(int column) { + if (column < 0 || column >= columnSize()) { + throw new IndexException(column, columnSize()); + } + return new PermutedVectorView(base.viewColumn(columnPivot[column]), rowPivot, rowUnpivot); + } + + /** + * Return the row at the given index + * + * @param row an int row index + * @return a Vector at the index + * @throws org.apache.mahout.math.IndexException + * if the index is out of bounds + */ + @Override + public Vector viewRow(int row) { + if (row < 0 || row >= rowSize()) { + throw new IndexException(row, rowSize()); + } + return new PermutedVectorView(base.viewRow(rowPivot[row]), columnPivot, columnUnpivot); + } + + /** + * Return the value at the given indexes, without checking bounds + * + * @param row an int row index + * @param column an int column index + * @return the double at the index + */ + @Override + public double getQuick(int row, int column) { + return base.getQuick(rowPivot[row], columnPivot[column]); + } + + /** + * Return an empty matrix of the same underlying class as the receiver + * + * @return a Matrix + */ + @Override + public Matrix like() { + return new PivotedMatrix(base.like()); + } + + + @Override + public Matrix clone() { + PivotedMatrix clone = (PivotedMatrix) super.clone(); + + base = base.clone(); + rowPivot = rowPivot.clone(); + rowUnpivot = rowUnpivot.clone(); + columnPivot = columnPivot.clone(); + columnUnpivot = columnUnpivot.clone(); + + return clone; + } + + + /** + * Returns an empty matrix of the same underlying class as the receiver and of the specified + * size. + * + * @param rows the int number of rows + * @param columns the int number of columns + */ + @Override + public Matrix like(int rows, int columns) { + return new PivotedMatrix(base.like(rows, columns)); + } + + /** + * Set the value at the given index, without checking bounds + * + * @param row an int row index into the receiver + * @param column an int column index into the receiver + * @param value a double value to set + */ + @Override + public void setQuick(int row, int column, double value) { + base.setQuick(rowPivot[row], columnPivot[column], value); + } + + /** + * Return the number of values in the recipient + * + * @return an int[2] containing [row, column] count + */ + @Override + public int[] getNumNondefaultElements() { + return base.getNumNondefaultElements(); + } + + /** + * Return a new matrix containing the subset of the recipient + * + * @param offset an int[2] offset into the receiver + * @param size the int[2] size of the desired result + * @return a new Matrix that is a view of the original + * @throws org.apache.mahout.math.CardinalityException + * if the length is greater than the cardinality of the receiver + * @throws org.apache.mahout.math.IndexException + * if the offset is negative or the offset+length is outside of the receiver + */ + @Override + public Matrix viewPart(int[] offset, int[] size) { + return new MatrixView(this, offset, size); + } + + public int rowUnpivot(int k) { + return rowUnpivot[k]; + } + + public int columnUnpivot(int k) { + return columnUnpivot[k]; + } + + public int[] getRowPivot() { + return rowPivot; + } + + public int[] getInverseRowPivot() { + return rowUnpivot; + } + + public int[] getColumnPivot() { + return columnPivot; + } + + public int[] getInverseColumnPivot() { + return columnUnpivot; + } + + public Matrix getBase() { + return base; + } + + private static int[] identityPivot(int n) { + int[] pivot = new int[n]; + for (int i = 0; i < n; i++) { + pivot[i] = i; + } + return pivot; + } + + private static int[] invert(int[] pivot) { + int[] x = new int[pivot.length]; + for (int i = 0; i < pivot.length; i++) { + x[pivot[i]] = i; + } + return x; + } +} http://git-wip-us.apache.org/repos/asf/mahout/blob/545648f6/core/src/main/java/org/apache/mahout/math/QR.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/mahout/math/QR.java b/core/src/main/java/org/apache/mahout/math/QR.java new file mode 100644 index 0000000..5992224 --- /dev/null +++ b/core/src/main/java/org/apache/mahout/math/QR.java @@ -0,0 +1,27 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License.package org.apache.mahout.math; + */ +package org.apache.mahout.math; + +public interface QR { + Matrix getQ(); + + Matrix getR(); + + boolean hasFullRank(); + + Matrix solve(Matrix B); +} http://git-wip-us.apache.org/repos/asf/mahout/blob/545648f6/core/src/main/java/org/apache/mahout/math/QRDecomposition.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/mahout/math/QRDecomposition.java b/core/src/main/java/org/apache/mahout/math/QRDecomposition.java new file mode 100644 index 0000000..ab5b3d2 --- /dev/null +++ b/core/src/main/java/org/apache/mahout/math/QRDecomposition.java @@ -0,0 +1,181 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * + * Copyright 1999 CERN - European Organization for Nuclear Research. + * Permission to use, copy, modify, distribute and sell this software and its documentation for any purpose + * is hereby granted without fee, provided that the above copyright notice appear in all copies and + * that both that copyright notice and this permission notice appear in supporting documentation. + * CERN makes no representations about the suitability of this software for any purpose. + * It is provided "as is" without expressed or implied warranty. + */ +package org.apache.mahout.math; + +import org.apache.mahout.math.function.Functions; + +import java.util.Locale; + +/** + For an <tt>m x n</tt> matrix <tt>A</tt> with {@code m >= n}, the QR decomposition is an <tt>m x n</tt> + orthogonal matrix <tt>Q</tt> and an <tt>n x n</tt> upper triangular matrix <tt>R</tt> so that + <tt>A = Q*R</tt>. + <P> + The QR decomposition always exists, even if the matrix does not have + full rank, so the constructor will never fail. The primary use of the + QR decomposition is in the least squares solution of non-square systems + of simultaneous linear equations. This will fail if <tt>isFullRank()</tt> + returns <tt>false</tt>. + */ + +public class QRDecomposition implements QR { + private final Matrix q; + private final Matrix r; + private final Matrix mType; + private final boolean fullRank; + private final int rows; + private final int columns; + + /** + * Constructs and returns a new QR decomposition object; computed by Householder reflections; The + * decomposed matrices can be retrieved via instance methods of the returned decomposition + * object. + * + * @param a A rectangular matrix. + * @throws IllegalArgumentException if {@code A.rows() < A.columns()}. + */ + public QRDecomposition(Matrix a) { + + rows = a.rowSize(); + int min = Math.min(a.rowSize(), a.columnSize()); + columns = a.columnSize(); + mType = a.like(1,1); + + Matrix qTmp = a.clone(); + + boolean fullRank = true; + + r = new DenseMatrix(min, columns); + + for (int i = 0; i < min; i++) { + Vector qi = qTmp.viewColumn(i); + double alpha = qi.norm(2); + if (Math.abs(alpha) > Double.MIN_VALUE) { + qi.assign(Functions.div(alpha)); + } else { + if (Double.isInfinite(alpha) || Double.isNaN(alpha)) { + throw new ArithmeticException("Invalid intermediate result"); + } + fullRank = false; + } + r.set(i, i, alpha); + + for (int j = i + 1; j < columns; j++) { + Vector qj = qTmp.viewColumn(j); + double norm = qj.norm(2); + if (Math.abs(norm) > Double.MIN_VALUE) { + double beta = qi.dot(qj); + r.set(i, j, beta); + if (j < min) { + qj.assign(qi, Functions.plusMult(-beta)); + } + } else { + if (Double.isInfinite(norm) || Double.isNaN(norm)) { + throw new ArithmeticException("Invalid intermediate result"); + } + } + } + } + if (columns > min) { + q = qTmp.viewPart(0, rows, 0, min).clone(); + } else { + q = qTmp; + } + this.fullRank = fullRank; + } + + /** + * Generates and returns the (economy-sized) orthogonal factor <tt>Q</tt>. + * + * @return <tt>Q</tt> + */ + @Override + public Matrix getQ() { + return q; + } + + /** + * Returns the upper triangular factor, <tt>R</tt>. + * + * @return <tt>R</tt> + */ + @Override + public Matrix getR() { + return r; + } + + /** + * Returns whether the matrix <tt>A</tt> has full rank. + * + * @return true if <tt>R</tt>, and hence <tt>A</tt>, has full rank. + */ + @Override + public boolean hasFullRank() { + return fullRank; + } + + /** + * Least squares solution of <tt>A*X = B</tt>; <tt>returns X</tt>. + * + * @param B A matrix with as many rows as <tt>A</tt> and any number of columns. + * @return <tt>X</tt> that minimizes the two norm of <tt>Q*R*X - B</tt>. + * @throws IllegalArgumentException if <tt>B.rows() != A.rows()</tt>. + */ + @Override + public Matrix solve(Matrix B) { + if (B.numRows() != rows) { + throw new IllegalArgumentException("Matrix row dimensions must agree."); + } + + int cols = B.numCols(); + Matrix x = mType.like(columns, cols); + + // this can all be done a bit more efficiently if we don't actually + // form explicit versions of Q^T and R but this code isn't so bad + // and it is much easier to understand + Matrix qt = getQ().transpose(); + Matrix y = qt.times(B); + + Matrix r = getR(); + for (int k = Math.min(columns, rows) - 1; k >= 0; k--) { + // X[k,] = Y[k,] / R[k,k], note that X[k,] starts with 0 so += is same as = + x.viewRow(k).assign(y.viewRow(k), Functions.plusMult(1 / r.get(k, k))); + + // Y[0:(k-1),] -= R[0:(k-1),k] * X[k,] + Vector rColumn = r.viewColumn(k).viewPart(0, k); + for (int c = 0; c < cols; c++) { + y.viewColumn(c).viewPart(0, k).assign(rColumn, Functions.plusMult(-x.get(k, c))); + } + } + return x; + } + + /** + * Returns a rough string rendition of a QR. + */ + @Override + public String toString() { + return String.format(Locale.ENGLISH, "QR(%d x %d,fullRank=%s)", rows, columns, hasFullRank()); + } +} http://git-wip-us.apache.org/repos/asf/mahout/blob/545648f6/core/src/main/java/org/apache/mahout/math/RandomAccessSparseVector.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/mahout/math/RandomAccessSparseVector.java b/core/src/main/java/org/apache/mahout/math/RandomAccessSparseVector.java new file mode 100644 index 0000000..c325078 --- /dev/null +++ b/core/src/main/java/org/apache/mahout/math/RandomAccessSparseVector.java @@ -0,0 +1,303 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.mahout.math; + +import it.unimi.dsi.fastutil.doubles.DoubleIterator; +import it.unimi.dsi.fastutil.ints.Int2DoubleMap; +import it.unimi.dsi.fastutil.ints.Int2DoubleMap.Entry; +import it.unimi.dsi.fastutil.ints.Int2DoubleOpenHashMap; +import it.unimi.dsi.fastutil.objects.ObjectIterator; + +import java.util.Iterator; +import java.util.NoSuchElementException; + +import org.apache.mahout.math.set.AbstractSet; + +/** Implements vector that only stores non-zero doubles */ +public class RandomAccessSparseVector extends AbstractVector { + + private static final int INITIAL_CAPACITY = 11; + + private Int2DoubleOpenHashMap values; + + /** For serialization purposes only. */ + public RandomAccessSparseVector() { + super(0); + } + + public RandomAccessSparseVector(int cardinality) { + this(cardinality, Math.min(cardinality, INITIAL_CAPACITY)); // arbitrary estimate of 'sparseness' + } + + public RandomAccessSparseVector(int cardinality, int initialCapacity) { + super(cardinality); + values = new Int2DoubleOpenHashMap(initialCapacity, .5f); + } + + public RandomAccessSparseVector(Vector other) { + this(other.size(), other.getNumNondefaultElements()); + for (Element e : other.nonZeroes()) { + values.put(e.index(), e.get()); + } + } + + private RandomAccessSparseVector(int cardinality, Int2DoubleOpenHashMap values) { + super(cardinality); + this.values = values; + } + + public RandomAccessSparseVector(RandomAccessSparseVector other, boolean shallowCopy) { + super(other.size()); + values = shallowCopy ? other.values : other.values.clone(); + } + + @Override + protected Matrix matrixLike(int rows, int columns) { + return new SparseMatrix(rows, columns); + } + + @Override + public RandomAccessSparseVector clone() { + return new RandomAccessSparseVector(size(), values.clone()); + } + + @Override + public String toString() { + return sparseVectorToString(); + } + + @Override + public Vector assign(Vector other) { + if (size() != other.size()) { + throw new CardinalityException(size(), other.size()); + } + values.clear(); + for (Element e : other.nonZeroes()) { + setQuick(e.index(), e.get()); + } + return this; + } + + @Override + public void mergeUpdates(OrderedIntDoubleMapping updates) { + for (int i = 0; i < updates.getNumMappings(); ++i) { + values.put(updates.getIndices()[i], updates.getValues()[i]); + } + } + + /** + * @return false + */ + @Override + public boolean isDense() { + return false; + } + + /** + * @return false + */ + @Override + public boolean isSequentialAccess() { + return false; + } + + @Override + public double getQuick(int index) { + return values.get(index); + } + + @Override + public void setQuick(int index, double value) { + invalidateCachedLength(); + if (value == 0.0) { + values.remove(index); + } else { + values.put(index, value); + } + } + + @Override + public void incrementQuick(int index, double increment) { + invalidateCachedLength(); + values.addTo( index, increment); + } + + + @Override + public RandomAccessSparseVector like() { + return new RandomAccessSparseVector(size(), values.size()); + } + + @Override + public Vector like(int cardinality) { + return new RandomAccessSparseVector(cardinality, values.size()); + } + + @Override + public int getNumNondefaultElements() { + return values.size(); + } + + @Override + public int getNumNonZeroElements() { + final DoubleIterator iterator = values.values().iterator(); + int numNonZeros = 0; + for( int i = values.size(); i-- != 0; ) if ( iterator.nextDouble() != 0 ) numNonZeros++; + return numNonZeros; + } + + @Override + public double getLookupCost() { + return 1; + } + + @Override + public double getIteratorAdvanceCost() { + return 1 + (AbstractSet.DEFAULT_MAX_LOAD_FACTOR + AbstractSet.DEFAULT_MIN_LOAD_FACTOR) / 2; + } + + /** + * This is "sort of" constant, but really it might resize the array. + */ + @Override + public boolean isAddConstantTime() { + return true; + } + + /* + @Override + public Element getElement(int index) { + // TODO: this should return a MapElement so as to avoid hashing for both getQuick and setQuick. + return super.getElement(index); + } + */ + + private final class NonZeroIterator implements Iterator<Element> { + final ObjectIterator<Int2DoubleMap.Entry> fastIterator = values.int2DoubleEntrySet().fastIterator(); + final RandomAccessElement element = new RandomAccessElement( fastIterator ); + + @Override + public boolean hasNext() { + return fastIterator.hasNext(); + } + + @Override + public Element next() { + if ( ! hasNext() ) throw new NoSuchElementException(); + element.entry = fastIterator.next(); + return element; + } + + @Override + public void remove() { + throw new UnsupportedOperationException(); + } + } + + final class RandomAccessElement implements Element { + Int2DoubleMap.Entry entry; + final ObjectIterator<Int2DoubleMap.Entry> fastIterator; + + public RandomAccessElement( ObjectIterator<Entry> fastIterator ) { + super(); + this.fastIterator = fastIterator; + } + + @Override + public double get() { + return entry.getDoubleValue(); + } + + @Override + public int index() { + return entry.getIntKey(); + } + + @Override + public void set( double value ) { + invalidateCachedLength(); + if (value == 0.0) fastIterator.remove(); + else entry.setValue( value ); + } + } + /** + * NOTE: this implementation reuses the Vector.Element instance for each call of next(). If you need to preserve the + * instance, you need to make a copy of it + * + * @return an {@link Iterator} over the Elements. + * @see #getElement(int) + */ + @Override + public Iterator<Element> iterateNonZero() { + return new NonZeroIterator(); + } + + @Override + public Iterator<Element> iterator() { + return new AllIterator(); + } + + final class GeneralElement implements Element { + int index; + double value; + + @Override + public double get() { + return value; + } + + @Override + public int index() { + return index; + } + + @Override + public void set( double value ) { + invalidateCachedLength(); + if (value == 0.0) values.remove( index ); + else values.put( index, value ); + } +} + + private final class AllIterator implements Iterator<Element> { + private final GeneralElement element = new GeneralElement(); + + private AllIterator() { + element.index = -1; + } + + @Override + public boolean hasNext() { + return element.index + 1 < size(); + } + + @Override + public Element next() { + if (!hasNext()) { + throw new NoSuchElementException(); + } + element.value = values.get( ++element.index ); + return element; + } + + @Override + public void remove() { + throw new UnsupportedOperationException(); + } + } +} http://git-wip-us.apache.org/repos/asf/mahout/blob/545648f6/core/src/main/java/org/apache/mahout/math/RandomTrinaryMatrix.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/mahout/math/RandomTrinaryMatrix.java b/core/src/main/java/org/apache/mahout/math/RandomTrinaryMatrix.java new file mode 100644 index 0000000..85de0cd --- /dev/null +++ b/core/src/main/java/org/apache/mahout/math/RandomTrinaryMatrix.java @@ -0,0 +1,146 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.mahout.math; + +import java.nio.ByteBuffer; +import java.util.concurrent.atomic.AtomicInteger; + +/** + * Random matrix. Each value is taken from {-1,0,1} with roughly equal probability. Note + * that by default, the value is determined by a relatively simple hash of the coordinates. + * Such a hash is not usable where real randomness is required, but suffices nicely for + * random projection methods. + * + * If the simple hash method is not satisfactory, an optional high quality mode is available + * which uses a murmur hash of the coordinates. + */ +public class RandomTrinaryMatrix extends AbstractMatrix { + private static final AtomicInteger ID = new AtomicInteger(); + private static final int PRIME1 = 104047; + private static final int PRIME2 = 101377; + private static final int PRIME3 = 64661; + private static final long SCALE = 1L << 32; + + private final int seed; + + // set this to true to use a high quality hash + private boolean highQuality = false; + + public RandomTrinaryMatrix(int seed, int rows, int columns, boolean highQuality) { + super(rows, columns); + + this.highQuality = highQuality; + this.seed = seed; + } + + public RandomTrinaryMatrix(int rows, int columns) { + this(ID.incrementAndGet(), rows, columns, false); + } + + @Override + public Matrix assignColumn(int column, Vector other) { + throw new UnsupportedOperationException("Can't assign to read-only matrix"); + } + + @Override + public Matrix assignRow(int row, Vector other) { + throw new UnsupportedOperationException("Can't assign to read-only matrix"); + } + + /** + * Return the value at the given indexes, without checking bounds + * + * @param row an int row index + * @param column an int column index + * @return the double at the index + */ + @Override + public double getQuick(int row, int column) { + if (highQuality) { + ByteBuffer buf = ByteBuffer.allocate(8); + buf.putInt(row); + buf.putInt(column); + buf.flip(); + return (MurmurHash.hash64A(buf, seed) & (SCALE - 1)) / (double) SCALE; + } else { + // this isn't a fantastic random number generator, but it is just fine for random projections + return ((((row * PRIME1) + column * PRIME2 + row * column * PRIME3) & 8) * 0.25) - 1; + } + } + + + /** + * Return an empty matrix of the same underlying class as the receiver + * + * @return a Matrix + */ + @Override + public Matrix like() { + return new DenseMatrix(rowSize(), columnSize()); + } + + /** + * Returns an empty matrix of the same underlying class as the receiver and of the specified + * size. + * + * @param rows the int number of rows + * @param columns the int number of columns + */ + @Override + public Matrix like(int rows, int columns) { + return new DenseMatrix(rows, columns); + } + + /** + * Set the value at the given index, without checking bounds + * + * @param row an int row index into the receiver + * @param column an int column index into the receiver + * @param value a double value to set + */ + @Override + public void setQuick(int row, int column, double value) { + throw new UnsupportedOperationException("Can't assign to read-only matrix"); + } + + /** + * Return the number of values in the recipient + * + * @return an int[2] containing [row, column] count + */ + @Override + public int[] getNumNondefaultElements() { + throw new UnsupportedOperationException("Can't assign to read-only matrix"); + } + + /** + * Return a new matrix containing the subset of the recipient + * + * @param offset an int[2] offset into the receiver + * @param size the int[2] size of the desired result + * @return a new Matrix that is a view of the original + * @throws org.apache.mahout.math.CardinalityException + * if the length is greater than the cardinality of the receiver + * @throws org.apache.mahout.math.IndexException + * if the offset is negative or the offset+length is outside of the receiver + */ + @Override + public Matrix viewPart(int[] offset, int[] size) { + return new MatrixView(this, offset, size); + } +} http://git-wip-us.apache.org/repos/asf/mahout/blob/545648f6/core/src/main/java/org/apache/mahout/math/SequentialAccessSparseVector.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/mahout/math/SequentialAccessSparseVector.java b/core/src/main/java/org/apache/mahout/math/SequentialAccessSparseVector.java new file mode 100644 index 0000000..f7d67a7 --- /dev/null +++ b/core/src/main/java/org/apache/mahout/math/SequentialAccessSparseVector.java @@ -0,0 +1,379 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.mahout.math; + +import java.util.Arrays; +import java.util.Iterator; +import java.util.NoSuchElementException; + +import com.google.common.primitives.Doubles; +import org.apache.mahout.math.function.Functions; + +/** + * <p> + * Implements vector that only stores non-zero doubles as a pair of parallel arrays (OrderedIntDoubleMapping), + * one int[], one double[]. If there are <b>k</b> non-zero elements in the vector, this implementation has + * O(log(k)) random-access read performance, and O(k) random-access write performance, which is far below that + * of the hashmap based {@link org.apache.mahout.math.RandomAccessSparseVector RandomAccessSparseVector}. This + * class is primarily used for operations where the all the elements will be accessed in a read-only fashion + * sequentially: methods which operate not via get() or set(), but via iterateNonZero(), such as (but not limited + * to) :</p> + * <ul> + * <li>dot(Vector)</li> + * <li>addTo(Vector)</li> + * </ul> + * + * See {@link OrderedIntDoubleMapping} + */ +public class SequentialAccessSparseVector extends AbstractVector { + + private OrderedIntDoubleMapping values; + + /** For serialization purposes only. */ + public SequentialAccessSparseVector() { + super(0); + } + + public SequentialAccessSparseVector(int cardinality) { + this(cardinality, Math.min(100, cardinality / 1000 < 10 ? 10 : cardinality / 1000)); // arbitrary estimate of + // 'sparseness' + } + + public SequentialAccessSparseVector(int cardinality, int size) { + super(cardinality); + values = new OrderedIntDoubleMapping(size); + } + + public SequentialAccessSparseVector(Vector other) { + this(other.size(), other.getNumNondefaultElements()); + + if (other.isSequentialAccess()) { + for (Element e : other.nonZeroes()) { + set(e.index(), e.get()); + } + } else { + // If the incoming Vector to copy is random, then adding items + // from the Iterator can degrade performance dramatically if + // the number of elements is large as this Vector tries to stay + // in order as items are added, so it's better to sort the other + // Vector's elements by index and then add them to this + copySortedRandomAccessSparseVector(other); + } + } + + // Sorts a RandomAccessSparseVectors Elements before adding them to this + private int copySortedRandomAccessSparseVector(Vector other) { + int elementCount = other.getNumNondefaultElements(); + OrderedElement[] sortableElements = new OrderedElement[elementCount]; + int s = 0; + for (Element e : other.nonZeroes()) { + sortableElements[s++] = new OrderedElement(e.index(), e.get()); + } + Arrays.sort(sortableElements); + for (int i = 0; i < sortableElements.length; i++) { + values.setIndexAt(i, sortableElements[i].index); + values.setValueAt(i, sortableElements[i].value); + } + values = new OrderedIntDoubleMapping(values.getIndices(), values.getValues(), elementCount); + return elementCount; + } + + public SequentialAccessSparseVector(SequentialAccessSparseVector other, boolean shallowCopy) { + super(other.size()); + values = shallowCopy ? other.values : other.values.clone(); + } + + public SequentialAccessSparseVector(SequentialAccessSparseVector other) { + this(other.size(), other.getNumNondefaultElements()); + values = other.values.clone(); + } + + private SequentialAccessSparseVector(int cardinality, OrderedIntDoubleMapping values) { + super(cardinality); + this.values = values; + } + + @Override + protected Matrix matrixLike(int rows, int columns) { + //return new SparseRowMatrix(rows, columns); + return new SparseMatrix(rows, columns); + } + + @SuppressWarnings("CloneDoesntCallSuperClone") + @Override + public SequentialAccessSparseVector clone() { + return new SequentialAccessSparseVector(size(), values.clone()); + } + + @Override + public void mergeUpdates(OrderedIntDoubleMapping updates) { + values.merge(updates); + } + + @Override + public String toString() { + return sparseVectorToString(); + } + + /** + * @return false + */ + @Override + public boolean isDense() { + return false; + } + + /** + * @return true + */ + @Override + public boolean isSequentialAccess() { + return true; + } + + /** + * Warning! This takes O(log n) time as it does a binary search behind the scenes! + * Only use it when STRICTLY necessary. + * @param index an int index. + * @return the value at that position in the vector. + */ + @Override + public double getQuick(int index) { + return values.get(index); + } + + /** + * Warning! This takes O(log n) time as it does a binary search behind the scenes! + * Only use it when STRICTLY necessary. + * @param index an int index. + */ + @Override + public void setQuick(int index, double value) { + invalidateCachedLength(); + values.set(index, value); + } + + @Override + public void incrementQuick(int index, double increment) { + invalidateCachedLength(); + values.increment(index, increment); + } + + @Override + public SequentialAccessSparseVector like() { + return new SequentialAccessSparseVector(size(), values.getNumMappings()); + } + + @Override + public Vector like(int cardinality) { + return new SequentialAccessSparseVector(cardinality); + } + + @Override + public int getNumNondefaultElements() { + return values.getNumMappings(); + } + + @Override + public int getNumNonZeroElements() { + double[] elementValues = values.getValues(); + int numMappedElements = values.getNumMappings(); + int numNonZeros = 0; + for (int index = 0; index < numMappedElements; index++) { + if (elementValues[index] != 0) { + numNonZeros++; + } + } + return numNonZeros; + } + + @Override + public double getLookupCost() { + return Math.max(1, Math.round(Functions.LOG2.apply(getNumNondefaultElements()))); + } + + @Override + public double getIteratorAdvanceCost() { + return 1; + } + + @Override + public boolean isAddConstantTime() { + return false; + } + + @Override + public Iterator<Element> iterateNonZero() { + + // TODO: this is a bug, since nonDefaultIterator doesn't hold to non-zero contract. + return new NonDefaultIterator(); + } + + @Override + public Iterator<Element> iterator() { + return new AllIterator(); + } + + private final class NonDefaultIterator implements Iterator<Element> { + private final NonDefaultElement element = new NonDefaultElement(); + + @Override + public boolean hasNext() { + return element.getNextOffset() < values.getNumMappings(); + } + + @Override + public Element next() { + if (!hasNext()) { + throw new NoSuchElementException(); + } + element.advanceOffset(); + return element; + } + + @Override + public void remove() { + throw new UnsupportedOperationException(); + } + } + + private final class AllIterator implements Iterator<Element> { + private final AllElement element = new AllElement(); + + @Override + public boolean hasNext() { + return element.getNextIndex() < SequentialAccessSparseVector.this.size(); + } + + @Override + public Element next() { + if (!hasNext()) { + throw new NoSuchElementException(); + } + + element.advanceIndex(); + return element; + } + + @Override + public void remove() { + throw new UnsupportedOperationException(); + } + } + + private final class NonDefaultElement implements Element { + private int offset = -1; + + void advanceOffset() { + offset++; + } + + int getNextOffset() { + return offset + 1; + } + + @Override + public double get() { + return values.getValues()[offset]; + } + + @Override + public int index() { + return values.getIndices()[offset]; + } + + @Override + public void set(double value) { + invalidateCachedLength(); + values.setValueAt(offset, value); + } + } + + private final class AllElement implements Element { + private int index = -1; + private int nextOffset; + + void advanceIndex() { + index++; + if (nextOffset < values.getNumMappings() && index > values.getIndices()[nextOffset]) { + nextOffset++; + } + } + + int getNextIndex() { + return index + 1; + } + + @Override + public double get() { + if (nextOffset < values.getNumMappings() && index == values.getIndices()[nextOffset]) { + return values.getValues()[nextOffset]; + } else { + return OrderedIntDoubleMapping.DEFAULT_VALUE; + } + } + + @Override + public int index() { + return index; + } + + @Override + public void set(double value) { + invalidateCachedLength(); + if (nextOffset < values.getNumMappings() && index == values.indexAt(nextOffset)) { + values.setValueAt(nextOffset, value); + } else { + // Yes, this works; the offset into indices of the new value's index will still be nextOffset + values.set(index, value); + } + } + } + + // Comparable Element for sorting Elements by index + private static final class OrderedElement implements Comparable<OrderedElement> { + private final int index; + private final double value; + + OrderedElement(int index, double value) { + this.index = index; + this.value = value; + } + + @Override + public int compareTo(OrderedElement that) { + // both indexes are positive, and neither can be Integer.MAX_VALUE (otherwise there would be + // an array somewhere with Integer.MAX_VALUE + 1 elements) + return this.index - that.index; + } + + @Override + public int hashCode() { + return index ^ Doubles.hashCode(value); + } + + @Override + public boolean equals(Object o) { + if (!(o instanceof OrderedElement)) { + return false; + } + OrderedElement other = (OrderedElement) o; + return index == other.index && value == other.value; + } + } +}
