http://git-wip-us.apache.org/repos/asf/mahout/blob/545648f6/core/src/main/java/org/apache/mahout/math/Arrays.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/mahout/math/Arrays.java b/core/src/main/java/org/apache/mahout/math/Arrays.java new file mode 100644 index 0000000..802ffb7 --- /dev/null +++ b/core/src/main/java/org/apache/mahout/math/Arrays.java @@ -0,0 +1,662 @@ +/* +Copyright 1999 CERN - European Organization for Nuclear Research. +Permission to use, copy, modify, distribute and sell this software and its documentation for any purpose +is hereby granted without fee, provided that the above copyright notice appear in all copies and +that both that copyright notice and this permission notice appear in supporting documentation. +CERN makes no representations about the suitability of this software for any purpose. +It is provided "as is" without expressed or implied warranty. +*/ +package org.apache.mahout.math; + +/** + * Array manipulations; complements <tt>java.util.Arrays</tt>. + * + * @see java.util.Arrays + * @see org.apache.mahout.math.Sorting + * + */ +public final class Arrays { + + private Arrays() { + } + + /** + * Ensures that a given array can hold up to <tt>minCapacity</tt> elements. + * + * Returns the identical array if it can hold at least the number of elements specified. Otherwise, returns a new + * array with increased capacity containing the same elements, ensuring that it can hold at least the number of + * elements specified by the minimum capacity argument. + * + * @param minCapacity the desired minimum capacity. + */ + public static byte[] ensureCapacity(byte[] array, int minCapacity) { + int oldCapacity = array.length; + byte[] newArray; + if (minCapacity > oldCapacity) { + int newCapacity = (oldCapacity * 3) / 2 + 1; + if (newCapacity < minCapacity) { + newCapacity = minCapacity; + } + + newArray = new byte[newCapacity]; + System.arraycopy(array, 0, newArray, 0, oldCapacity); + } else { + newArray = array; + } + return newArray; + } + + /** + * Ensures that a given array can hold up to <tt>minCapacity</tt> elements. + * + * Returns the identical array if it can hold at least the number of elements specified. Otherwise, returns a new + * array with increased capacity containing the same elements, ensuring that it can hold at least the number of + * elements specified by the minimum capacity argument. + * + * @param minCapacity the desired minimum capacity. + */ + public static char[] ensureCapacity(char[] array, int minCapacity) { + int oldCapacity = array.length; + char[] newArray; + if (minCapacity > oldCapacity) { + int newCapacity = (oldCapacity * 3) / 2 + 1; + if (newCapacity < minCapacity) { + newCapacity = minCapacity; + } + + newArray = new char[newCapacity]; + System.arraycopy(array, 0, newArray, 0, oldCapacity); + } else { + newArray = array; + } + return newArray; + } + + /** + * Ensures that a given array can hold up to <tt>minCapacity</tt> elements. + * + * Returns the identical array if it can hold at least the number of elements specified. Otherwise, returns a new + * array with increased capacity containing the same elements, ensuring that it can hold at least the number of + * elements specified by the minimum capacity argument. + * + * @param minCapacity the desired minimum capacity. + */ + public static double[] ensureCapacity(double[] array, int minCapacity) { + int oldCapacity = array.length; + double[] newArray; + if (minCapacity > oldCapacity) { + int newCapacity = (oldCapacity * 3) / 2 + 1; + if (newCapacity < minCapacity) { + newCapacity = minCapacity; + } + + newArray = new double[newCapacity]; + //for (int i = oldCapacity; --i >= 0; ) newArray[i] = array[i]; + System.arraycopy(array, 0, newArray, 0, oldCapacity); + } else { + newArray = array; + } + return newArray; + } + + /** + * Ensures that a given array can hold up to <tt>minCapacity</tt> elements. + * + * Returns the identical array if it can hold at least the number of elements specified. Otherwise, returns a new + * array with increased capacity containing the same elements, ensuring that it can hold at least the number of + * elements specified by the minimum capacity argument. + * + * @param minCapacity the desired minimum capacity. + */ + public static float[] ensureCapacity(float[] array, int minCapacity) { + int oldCapacity = array.length; + float[] newArray; + if (minCapacity > oldCapacity) { + int newCapacity = (oldCapacity * 3) / 2 + 1; + if (newCapacity < minCapacity) { + newCapacity = minCapacity; + } + + newArray = new float[newCapacity]; + System.arraycopy(array, 0, newArray, 0, oldCapacity); + } else { + newArray = array; + } + return newArray; + } + + /** + * Ensures that a given array can hold up to <tt>minCapacity</tt> elements. + * + * Returns the identical array if it can hold at least the number of elements specified. Otherwise, returns a new + * array with increased capacity containing the same elements, ensuring that it can hold at least the number of + * elements specified by the minimum capacity argument. + * + * @param minCapacity the desired minimum capacity. + */ + public static int[] ensureCapacity(int[] array, int minCapacity) { + int oldCapacity = array.length; + int[] newArray; + if (minCapacity > oldCapacity) { + int newCapacity = (oldCapacity * 3) / 2 + 1; + if (newCapacity < minCapacity) { + newCapacity = minCapacity; + } + + newArray = new int[newCapacity]; + System.arraycopy(array, 0, newArray, 0, oldCapacity); + } else { + newArray = array; + } + return newArray; + } + + /** + * Ensures that a given array can hold up to <tt>minCapacity</tt> elements. + * + * Returns the identical array if it can hold at least the number of elements specified. Otherwise, returns a new + * array with increased capacity containing the same elements, ensuring that it can hold at least the number of + * elements specified by the minimum capacity argument. + * + * @param minCapacity the desired minimum capacity. + */ + public static long[] ensureCapacity(long[] array, int minCapacity) { + int oldCapacity = array.length; + long[] newArray; + if (minCapacity > oldCapacity) { + int newCapacity = (oldCapacity * 3) / 2 + 1; + if (newCapacity < minCapacity) { + newCapacity = minCapacity; + } + + newArray = new long[newCapacity]; + System.arraycopy(array, 0, newArray, 0, oldCapacity); + } else { + newArray = array; + } + return newArray; + } + + /** + * Ensures that a given array can hold up to <tt>minCapacity</tt> elements. + * + * Returns the identical array if it can hold at least the number of elements specified. Otherwise, returns a new + * array with increased capacity containing the same elements, ensuring that it can hold at least the number of + * elements specified by the minimum capacity argument. + * + * @param minCapacity the desired minimum capacity. + */ + public static Object[] ensureCapacity(Object[] array, int minCapacity) { + int oldCapacity = array.length; + Object[] newArray; + if (minCapacity > oldCapacity) { + int newCapacity = (oldCapacity * 3) / 2 + 1; + if (newCapacity < minCapacity) { + newCapacity = minCapacity; + } + + newArray = new Object[newCapacity]; + System.arraycopy(array, 0, newArray, 0, oldCapacity); + } else { + newArray = array; + } + return newArray; + } + + /** + * Ensures that a given array can hold up to <tt>minCapacity</tt> elements. + * + * Returns the identical array if it can hold at least the number of elements specified. Otherwise, returns a new + * array with increased capacity containing the same elements, ensuring that it can hold at least the number of + * elements specified by the minimum capacity argument. + * + * @param minCapacity the desired minimum capacity. + */ + public static short[] ensureCapacity(short[] array, int minCapacity) { + int oldCapacity = array.length; + short[] newArray; + if (minCapacity > oldCapacity) { + int newCapacity = (oldCapacity * 3) / 2 + 1; + if (newCapacity < minCapacity) { + newCapacity = minCapacity; + } + + newArray = new short[newCapacity]; + System.arraycopy(array, 0, newArray, 0, oldCapacity); + } else { + newArray = array; + } + return newArray; + } + + /** + * Ensures that a given array can hold up to <tt>minCapacity</tt> elements. + * + * Returns the identical array if it can hold at least the number of elements specified. Otherwise, returns a new + * array with increased capacity containing the same elements, ensuring that it can hold at least the number of + * elements specified by the minimum capacity argument. + * + * @param minCapacity the desired minimum capacity. + */ + public static boolean[] ensureCapacity(boolean[] array, int minCapacity) { + int oldCapacity = array.length; + boolean[] newArray; + if (minCapacity > oldCapacity) { + int newCapacity = (oldCapacity * 3) / 2 + 1; + if (newCapacity < minCapacity) { + newCapacity = minCapacity; + } + + newArray = new boolean[newCapacity]; + System.arraycopy(array, 0, newArray, 0, oldCapacity); + } else { + newArray = array; + } + return newArray; + } + + /** + * Returns a string representation of the specified array. The string representation consists of a list of the + * arrays's elements, enclosed in square brackets (<tt>"[]"</tt>). Adjacent elements are separated by the characters + * <tt>", "</tt> (comma and space). + * + * @return a string representation of the specified array. + */ + public static String toString(byte[] array) { + StringBuilder buf = new StringBuilder(); + buf.append('['); + int maxIndex = array.length - 1; + for (int i = 0; i <= maxIndex; i++) { + buf.append(array[i]); + if (i < maxIndex) { + buf.append(", "); + } + } + buf.append(']'); + return buf.toString(); + } + + /** + * Returns a string representation of the specified array. The string representation consists of a list of the + * arrays's elements, enclosed in square brackets (<tt>"[]"</tt>). Adjacent elements are separated by the characters + * <tt>", "</tt> (comma and space). + * + * @return a string representation of the specified array. + */ + public static String toString(char[] array) { + StringBuilder buf = new StringBuilder(); + buf.append('['); + int maxIndex = array.length - 1; + for (int i = 0; i <= maxIndex; i++) { + buf.append(array[i]); + if (i < maxIndex) { + buf.append(", "); + } + } + buf.append(']'); + return buf.toString(); + } + + /** + * Returns a string representation of the specified array. The string representation consists of a list of the + * arrays's elements, enclosed in square brackets (<tt>"[]"</tt>). Adjacent elements are separated by the characters + * <tt>", "</tt> (comma and space). + * + * @return a string representation of the specified array. + */ + public static String toString(double[] array) { + StringBuilder buf = new StringBuilder(); + buf.append('['); + int maxIndex = array.length - 1; + for (int i = 0; i <= maxIndex; i++) { + buf.append(array[i]); + if (i < maxIndex) { + buf.append(", "); + } + } + buf.append(']'); + return buf.toString(); + } + + /** + * Returns a string representation of the specified array. The string representation consists of a list of the + * arrays's elements, enclosed in square brackets (<tt>"[]"</tt>). Adjacent elements are separated by the characters + * <tt>", "</tt> (comma and space). + * + * @return a string representation of the specified array. + */ + public static String toString(float[] array) { + StringBuilder buf = new StringBuilder(); + buf.append('['); + int maxIndex = array.length - 1; + for (int i = 0; i <= maxIndex; i++) { + buf.append(array[i]); + if (i < maxIndex) { + buf.append(", "); + } + } + buf.append(']'); + return buf.toString(); + } + + /** + * Returns a string representation of the specified array. The string representation consists of a list of the + * arrays's elements, enclosed in square brackets (<tt>"[]"</tt>). Adjacent elements are separated by the characters + * <tt>", "</tt> (comma and space). + * + * @return a string representation of the specified array. + */ + public static String toString(int[] array) { + StringBuilder buf = new StringBuilder(); + buf.append('['); + int maxIndex = array.length - 1; + for (int i = 0; i <= maxIndex; i++) { + buf.append(array[i]); + if (i < maxIndex) { + buf.append(", "); + } + } + buf.append(']'); + return buf.toString(); + } + + /** + * Returns a string representation of the specified array. The string representation consists of a list of the + * arrays's elements, enclosed in square brackets (<tt>"[]"</tt>). Adjacent elements are separated by the characters + * <tt>", "</tt> (comma and space). + * + * @return a string representation of the specified array. + */ + public static String toString(long[] array) { + StringBuilder buf = new StringBuilder(); + buf.append('['); + int maxIndex = array.length - 1; + for (int i = 0; i <= maxIndex; i++) { + buf.append(array[i]); + if (i < maxIndex) { + buf.append(", "); + } + } + buf.append(']'); + return buf.toString(); + } + + /** + * Returns a string representation of the specified array. The string representation consists of a list of the + * arrays's elements, enclosed in square brackets (<tt>"[]"</tt>). Adjacent elements are separated by the characters + * <tt>", "</tt> (comma and space). + * + * @return a string representation of the specified array. + */ + public static String toString(Object[] array) { + StringBuilder buf = new StringBuilder(); + buf.append('['); + int maxIndex = array.length - 1; + for (int i = 0; i <= maxIndex; i++) { + buf.append(array[i]); + if (i < maxIndex) { + buf.append(", "); + } + } + buf.append(']'); + return buf.toString(); + } + + /** + * Returns a string representation of the specified array. The string representation consists of a list of the + * arrays's elements, enclosed in square brackets (<tt>"[]"</tt>). Adjacent elements are separated by the characters + * <tt>", "</tt> (comma and space). + * + * @return a string representation of the specified array. + */ + public static String toString(short[] array) { + StringBuilder buf = new StringBuilder(); + buf.append('['); + int maxIndex = array.length - 1; + for (int i = 0; i <= maxIndex; i++) { + buf.append(array[i]); + if (i < maxIndex) { + buf.append(", "); + } + } + buf.append(']'); + return buf.toString(); + } + + /** + * Returns a string representation of the specified array. The string representation consists of a list of the + * arrays's elements, enclosed in square brackets (<tt>"[]"</tt>). Adjacent elements are separated by the characters + * <tt>", "</tt> (comma and space). + * + * @return a string representation of the specified array. + */ + public static String toString(boolean[] array) { + StringBuilder buf = new StringBuilder(); + buf.append('['); + int maxIndex = array.length - 1; + for (int i = 0; i <= maxIndex; i++) { + buf.append(array[i]); + if (i < maxIndex) { + buf.append(", "); + } + } + buf.append(']'); + return buf.toString(); + } + + /** + * Ensures that the specified array cannot hold more than <tt>maxCapacity</tt> elements. An application can use this + * operation to minimize array storage. <p> Returns the identical array if <tt>array.length <= maxCapacity</tt>. + * Otherwise, returns a new array with a length of <tt>maxCapacity</tt> containing the first <tt>maxCapacity</tt> + * elements of <tt>array</tt>. + * + * @param maxCapacity the desired maximum capacity. + */ + public static byte[] trimToCapacity(byte[] array, int maxCapacity) { + if (array.length > maxCapacity) { + byte[] oldArray = array; + array = new byte[maxCapacity]; + System.arraycopy(oldArray, 0, array, 0, maxCapacity); + } + return array; + } + + /** + * Ensures that the specified array cannot hold more than <tt>maxCapacity</tt> elements. An application can use this + * operation to minimize array storage. <p> Returns the identical array if <tt>array.length <= maxCapacity</tt>. + * Otherwise, returns a new array with a length of <tt>maxCapacity</tt> containing the first <tt>maxCapacity</tt> + * elements of <tt>array</tt>. + * + * @param maxCapacity the desired maximum capacity. + */ + public static char[] trimToCapacity(char[] array, int maxCapacity) { + if (array.length > maxCapacity) { + char[] oldArray = array; + array = new char[maxCapacity]; + System.arraycopy(oldArray, 0, array, 0, maxCapacity); + } + return array; + } + + /** + * Ensures that the specified array cannot hold more than <tt>maxCapacity</tt> elements. An application can use this + * operation to minimize array storage. <p> Returns the identical array if <tt>array.length <= maxCapacity</tt>. + * Otherwise, returns a new array with a length of <tt>maxCapacity</tt> containing the first <tt>maxCapacity</tt> + * elements of <tt>array</tt>. + * + * @param maxCapacity the desired maximum capacity. + */ + public static double[] trimToCapacity(double[] array, int maxCapacity) { + if (array.length > maxCapacity) { + double[] oldArray = array; + array = new double[maxCapacity]; + System.arraycopy(oldArray, 0, array, 0, maxCapacity); + } + return array; + } + + /** + * Ensures that the specified array cannot hold more than <tt>maxCapacity</tt> elements. An application can use this + * operation to minimize array storage. <p> Returns the identical array if <tt>array.length <= maxCapacity</tt>. + * Otherwise, returns a new array with a length of <tt>maxCapacity</tt> containing the first <tt>maxCapacity</tt> + * elements of <tt>array</tt>. + * + * @param maxCapacity the desired maximum capacity. + */ + public static float[] trimToCapacity(float[] array, int maxCapacity) { + if (array.length > maxCapacity) { + float[] oldArray = array; + array = new float[maxCapacity]; + System.arraycopy(oldArray, 0, array, 0, maxCapacity); + } + return array; + } + + /** + * Ensures that the specified array cannot hold more than <tt>maxCapacity</tt> elements. An application can use this + * operation to minimize array storage. <p> Returns the identical array if <tt>array.length <= maxCapacity</tt>. + * Otherwise, returns a new array with a length of <tt>maxCapacity</tt> containing the first <tt>maxCapacity</tt> + * elements of <tt>array</tt>. + * + * @param maxCapacity the desired maximum capacity. + */ + public static int[] trimToCapacity(int[] array, int maxCapacity) { + if (array.length > maxCapacity) { + int[] oldArray = array; + array = new int[maxCapacity]; + System.arraycopy(oldArray, 0, array, 0, maxCapacity); + } + return array; + } + + /** + * Ensures that the specified array cannot hold more than <tt>maxCapacity</tt> elements. An application can use this + * operation to minimize array storage. <p> Returns the identical array if <tt>array.length <= maxCapacity</tt>. + * Otherwise, returns a new array with a length of <tt>maxCapacity</tt> containing the first <tt>maxCapacity</tt> + * elements of <tt>array</tt>. + * + * @param maxCapacity the desired maximum capacity. + */ + public static long[] trimToCapacity(long[] array, int maxCapacity) { + if (array.length > maxCapacity) { + long[] oldArray = array; + array = new long[maxCapacity]; + System.arraycopy(oldArray, 0, array, 0, maxCapacity); + } + return array; + } + + /** + * Ensures that the specified array cannot hold more than <tt>maxCapacity</tt> elements. An application can use this + * operation to minimize array storage. <p> Returns the identical array if <tt>array.length <= maxCapacity</tt>. + * Otherwise, returns a new array with a length of <tt>maxCapacity</tt> containing the first <tt>maxCapacity</tt> + * elements of <tt>array</tt>. + * + * @param maxCapacity the desired maximum capacity. + */ + public static Object[] trimToCapacity(Object[] array, int maxCapacity) { + if (array.length > maxCapacity) { + Object[] oldArray = array; + array = new Object[maxCapacity]; + System.arraycopy(oldArray, 0, array, 0, maxCapacity); + } + return array; + } + + /** + * Ensures that the specified array cannot hold more than <tt>maxCapacity</tt> elements. An application can use this + * operation to minimize array storage. <p> Returns the identical array if <tt>array.length <= maxCapacity</tt>. + * Otherwise, returns a new array with a length of <tt>maxCapacity</tt> containing the first <tt>maxCapacity</tt> + * elements of <tt>array</tt>. + * + * @param maxCapacity the desired maximum capacity. + */ + public static short[] trimToCapacity(short[] array, int maxCapacity) { + if (array.length > maxCapacity) { + short[] oldArray = array; + array = new short[maxCapacity]; + System.arraycopy(oldArray, 0, array, 0, maxCapacity); + } + return array; + } + + /** + * Ensures that the specified array cannot hold more than <tt>maxCapacity</tt> elements. An application can use this + * operation to minimize array storage. <p> Returns the identical array if <tt>array.length <= maxCapacity</tt>. + * Otherwise, returns a new array with a length of <tt>maxCapacity</tt> containing the first <tt>maxCapacity</tt> + * elements of <tt>array</tt>. + * + * @param maxCapacity the desired maximum capacity. + */ + public static boolean[] trimToCapacity(boolean[] array, int maxCapacity) { + if (array.length > maxCapacity) { + boolean[] oldArray = array; + array = new boolean[maxCapacity]; + System.arraycopy(oldArray, 0, array, 0, maxCapacity); + } + return array; + } + + /** + * {@link java.util.Arrays#copyOf} compatibility with Java 1.5. + */ + public static byte[] copyOf(byte[] src, int length) { + byte[] result = new byte [length]; + System.arraycopy(src, 0, result, 0, Math.min(length, src.length)); + return result; + } + + /** + * {@link java.util.Arrays#copyOf} compatibility with Java 1.5. + */ + public static char[] copyOf(char[] src, int length) { + char[] result = new char [length]; + System.arraycopy(src, 0, result, 0, Math.min(length, src.length)); + return result; + } + + /** + * {@link java.util.Arrays#copyOf} compatibility with Java 1.5. + */ + public static short[] copyOf(short[] src, int length) { + short[] result = new short [length]; + System.arraycopy(src, 0, result, 0, Math.min(length, src.length)); + return result; + } + + /** + * {@link java.util.Arrays#copyOf} compatibility with Java 1.5. + */ + public static int[] copyOf(int[] src, int length) { + int[] result = new int [length]; + System.arraycopy(src, 0, result, 0, Math.min(length, src.length)); + return result; + } + + /** + * {@link java.util.Arrays#copyOf} compatibility with Java 1.5. + */ + public static float[] copyOf(float[] src, int length) { + float[] result = new float [length]; + System.arraycopy(src, 0, result, 0, Math.min(length, src.length)); + return result; + } + + /** + * {@link java.util.Arrays#copyOf} compatibility with Java 1.5. + */ + public static double[] copyOf(double[] src, int length) { + double[] result = new double [length]; + System.arraycopy(src, 0, result, 0, Math.min(length, src.length)); + return result; + } + + /** + * {@link java.util.Arrays#copyOf} compatibility with Java 1.5. + */ + public static long[] copyOf(long[] src, int length) { + long[] result = new long [length]; + System.arraycopy(src, 0, result, 0, Math.min(length, src.length)); + return result; + } +}
http://git-wip-us.apache.org/repos/asf/mahout/blob/545648f6/core/src/main/java/org/apache/mahout/math/BinarySearch.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/mahout/math/BinarySearch.java b/core/src/main/java/org/apache/mahout/math/BinarySearch.java new file mode 100644 index 0000000..ddb04a7 --- /dev/null +++ b/core/src/main/java/org/apache/mahout/math/BinarySearch.java @@ -0,0 +1,403 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.mahout.math; + +import java.util.Comparator; + +public final class BinarySearch { + + private BinarySearch() {} + + /** + * Performs a binary search for the specified element in the specified + * ascending sorted array. Searching in an unsorted array has an undefined + * result. It's also undefined which element is found if there are multiple + * occurrences of the same element. + * + * @param array + * the sorted {@code byte} array to search. + * @param value + * the {@code byte} element to find. + * @param from + * the first index to sort, inclusive. + * @param to + * the last index to sort, inclusive. + * @return the non-negative index of the element, or a negative index which is + * {@code -index - 1} where the element would be inserted. + */ + public static int binarySearchFromTo(byte[] array, byte value, int from, int to) { + int mid = -1; + while (from <= to) { + mid = (from + to) >>> 1; + if (value > array[mid]) { + from = mid + 1; + } else if (value == array[mid]) { + return mid; + } else { + to = mid - 1; + } + } + if (mid < 0) { + return -1; + } + + return -mid - (value < array[mid] ? 1 : 2); + } + + /** + * Performs a binary search for the specified element in the specified + * ascending sorted array. Searching in an unsorted array has an undefined + * result. It's also undefined which element is found if there are multiple + * occurrences of the same element. + * + * @param array + * the sorted {@code char} array to search. + * @param value + * the {@code char} element to find. + * @param from + * the first index to sort, inclusive. + * @param to + * the last index to sort, inclusive. + * @return the non-negative index of the element, or a negative index which is + * {@code -index - 1} where the element would be inserted. + */ + public static int binarySearchFromTo(char[] array, char value, int from, int to) { + int mid = -1; + while (from <= to) { + mid = (from + to) >>> 1; + if (value > array[mid]) { + from = mid + 1; + } else if (value == array[mid]) { + return mid; + } else { + to = mid - 1; + } + } + if (mid < 0) { + return -1; + } + return -mid - (value < array[mid] ? 1 : 2); + } + + /** + * Performs a binary search for the specified element in the specified + * ascending sorted array. Searching in an unsorted array has an undefined + * result. It's also undefined which element is found if there are multiple + * occurrences of the same element. + * + * @param array + * the sorted {@code double} array to search. + * @param value + * the {@code double} element to find. + * @param from + * the first index to sort, inclusive. + * @param to + * the last index to sort, inclusive. + * @return the non-negative index of the element, or a negative index which is + * {@code -index - 1} where the element would be inserted. + */ + public static int binarySearchFromTo(double[] array, double value, int from, int to) { + long longBits = Double.doubleToLongBits(value); + int mid = -1; + while (from <= to) { + mid = (from + to) >>> 1; + if (lessThan(array[mid], value)) { + from = mid + 1; + } else if (longBits == Double.doubleToLongBits(array[mid])) { + return mid; + } else { + to = mid - 1; + } + } + if (mid < 0) { + return -1; + } + return -mid - (lessThan(value, array[mid]) ? 1 : 2); + } + + /** + * Performs a binary search for the specified element in the specified + * ascending sorted array. Searching in an unsorted array has an undefined + * result. It's also undefined which element is found if there are multiple + * occurrences of the same element. + * + * @param array + * the sorted {@code float} array to search. + * @param value + * the {@code float} element to find. + * @param from + * the first index to sort, inclusive. + * @param to + * the last index to sort, inclusive. + * @return the non-negative index of the element, or a negative index which is + * {@code -index - 1} where the element would be inserted. + */ + public static int binarySearchFromTo(float[] array, float value, int from, int to) { + int intBits = Float.floatToIntBits(value); + int mid = -1; + while (from <= to) { + mid = (from + to) >>> 1; + if (lessThan(array[mid], value)) { + from = mid + 1; + } else if (intBits == Float.floatToIntBits(array[mid])) { + return mid; + } else { + to = mid - 1; + } + } + if (mid < 0) { + return -1; + } + return -mid - (lessThan(value, array[mid]) ? 1 : 2); + } + + /** + * Performs a binary search for the specified element in the specified + * ascending sorted array. Searching in an unsorted array has an undefined + * result. It's also undefined which element is found if there are multiple + * occurrences of the same element. + * + * @param array + * the sorted {@code int} array to search. + * @param value + * the {@code int} element to find. + * @param from + * the first index to sort, inclusive. + * @param to + * the last index to sort, inclusive. + * @return the non-negative index of the element, or a negative index which is + * {@code -index - 1} where the element would be inserted. + */ + public static int binarySearchFromTo(int[] array, int value, int from, int to) { + int mid = -1; + while (from <= to) { + mid = (from + to) >>> 1; + if (value > array[mid]) { + from = mid + 1; + } else if (value == array[mid]) { + return mid; + } else { + to = mid - 1; + } + } + if (mid < 0) { + return -1; + } + return -mid - (value < array[mid] ? 1 : 2); + } + + /** + * Performs a binary search for the specified element in the specified + * ascending sorted array. Searching in an unsorted array has an undefined + * result. It's also undefined which element is found if there are multiple + * occurrences of the same element. + * + * @param array + * the sorted {@code long} array to search. + * @param value + * the {@code long} element to find. + * @param from + * the first index to sort, inclusive. + * @param to + * the last index to sort, inclusive. + * @return the non-negative index of the element, or a negative index which is + * {@code -index - 1} where the element would be inserted. + */ + public static int binarySearchFromTo(long[] array, long value, int from, int to) { + int mid = -1; + while (from <= to) { + mid = (from + to) >>> 1; + if (value > array[mid]) { + from = mid + 1; + } else if (value == array[mid]) { + return mid; + } else { + to = mid - 1; + } + } + if (mid < 0) { + return -1; + } + return -mid - (value < array[mid] ? 1 : 2); + } + + /** + * Performs a binary search for the specified element in the specified + * ascending sorted array. Searching in an unsorted array has an undefined + * result. It's also undefined which element is found if there are multiple + * occurrences of the same element. + * + * @param array + * the sorted {@code Object} array to search. + * @param object + * the {@code Object} element to find + * @param from + * the first index to sort, inclusive. + * @param to + * the last index to sort, inclusive. + * @return the non-negative index of the element, or a negative index which is + * {@code -index - 1} where the element would be inserted. + * + */ + public static <T extends Comparable<T>> int binarySearchFromTo(T[] array, T object, int from, int to) { + if (array.length == 0) { + return -1; + } + + int mid = 0; + int result = 0; + while (from <= to) { + mid = (from + to) >>> 1; + if ((result = array[mid].compareTo(object)) < 0) { + from = mid + 1; + } else if (result == 0) { + return mid; + } else { + to = mid - 1; + } + } + return -mid - (result >= 0 ? 1 : 2); + } + + /** + * Performs a binary search for the specified element in the specified + * ascending sorted array using the {@code Comparator} to compare elements. + * Searching in an unsorted array has an undefined result. It's also undefined + * which element is found if there are multiple occurrences of the same + * element. + * + * @param array + * the sorted array to search + * @param object + * the element to find + * @param from + * the first index to sort, inclusive. + * @param to + * the last index to sort, inclusive. + * @param comparator + * the {@code Comparator} used to compare the elements. + * @return the non-negative index of the element, or a negative index which + */ + public static <T> int binarySearchFromTo(T[] array, T object, int from, int to, Comparator<? super T> comparator) { + int mid = 0; + int result = 0; + while (from <= to) { + mid = (from + to) >>> 1; + if ((result = comparator.compare(array[mid], object)) < 0) { + from = mid + 1; + } else if (result == 0) { + return mid; + } else { + to = mid - 1; + } + } + return -mid - (result >= 0 ? 1 : 2); + } + + /** + * Performs a binary search for the specified element in the specified + * ascending sorted array. Searching in an unsorted array has an undefined + * result. It's also undefined which element is found if there are multiple + * occurrences of the same element. + * + * @param array + * the sorted {@code short} array to search. + * @param value + * the {@code short} element to find. + * @param from + * the first index to sort, inclusive. + * @param to + * the last index to sort, inclusive. + * @return the non-negative index of the element, or a negative index which is + * {@code -index - 1} where the element would be inserted. + */ + public static int binarySearchFromTo(short[] array, short value, int from, int to) { + int mid = -1; + while (from <= to) { + mid = (from + to) >>> 1; + if (value > array[mid]) { + from = mid + 1; + } else if (value == array[mid]) { + return mid; + } else { + to = mid - 1; + } + } + if (mid < 0) { + return -1; + } + return -mid - (value < array[mid] ? 1 : 2); + } + + private static boolean lessThan(double double1, double double2) { + // A slightly specialized version of + // Double.compare(double1, double2) < 0. + + // Non-zero and non-NaN checking. + if (double1 < double2) { + return true; + } + if (double1 > double2) { + return false; + } + if (double1 == double2 && double1 != 0.0) { + return false; + } + + // NaNs are equal to other NaNs and larger than any other double. + if (Double.isNaN(double1)) { + return false; + } + if (Double.isNaN(double2)) { + return true; + } + + // Deal with +0.0 and -0.0. + long d1 = Double.doubleToRawLongBits(double1); + long d2 = Double.doubleToRawLongBits(double2); + return d1 < d2; + } + + private static boolean lessThan(float float1, float float2) { + // A slightly specialized version of Float.compare(float1, float2) < 0. + + // Non-zero and non-NaN checking. + if (float1 < float2) { + return true; + } + if (float1 > float2) { + return false; + } + if (float1 == float2 && float1 != 0.0f) { + return false; + } + + // NaNs are equal to other NaNs and larger than any other float + if (Float.isNaN(float1)) { + return false; + } + if (Float.isNaN(float2)) { + return true; + } + + // Deal with +0.0 and -0.0 + int f1 = Float.floatToRawIntBits(float1); + int f2 = Float.floatToRawIntBits(float2); + return f1 < f2; + } +} http://git-wip-us.apache.org/repos/asf/mahout/blob/545648f6/core/src/main/java/org/apache/mahout/math/CardinalityException.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/mahout/math/CardinalityException.java b/core/src/main/java/org/apache/mahout/math/CardinalityException.java new file mode 100644 index 0000000..04e7602 --- /dev/null +++ b/core/src/main/java/org/apache/mahout/math/CardinalityException.java @@ -0,0 +1,30 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.mahout.math; + +/** + * Exception thrown when there is a cardinality mismatch in matrix or vector operations. + * For example, vectors of differing cardinality cannot be added. + */ +public class CardinalityException extends IllegalArgumentException { + + public CardinalityException(int expected, int cardinality) { + super("Required cardinality " + expected + " but got " + cardinality); + } + +} http://git-wip-us.apache.org/repos/asf/mahout/blob/545648f6/core/src/main/java/org/apache/mahout/math/Centroid.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/mahout/math/Centroid.java b/core/src/main/java/org/apache/mahout/math/Centroid.java new file mode 100644 index 0000000..dceffe1 --- /dev/null +++ b/core/src/main/java/org/apache/mahout/math/Centroid.java @@ -0,0 +1,89 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.mahout.math; + +import org.apache.mahout.math.function.Functions; + +/** + * A centroid is a weighted vector. We have it delegate to the vector itself for lots of operations + * to make it easy to use vector search classes and such. + */ +public class Centroid extends WeightedVector { + public Centroid(WeightedVector original) { + super(original.getVector().like().assign(original), original.getWeight(), original.getIndex()); + } + + public Centroid(int key, Vector initialValue) { + super(initialValue, 1, key); + } + + public Centroid(int key, Vector initialValue, double weight) { + super(initialValue, weight, key); + } + + public static Centroid create(int key, Vector initialValue) { + if (initialValue instanceof WeightedVector) { + return new Centroid(key, new DenseVector(initialValue), ((WeightedVector) initialValue).getWeight()); + } else { + return new Centroid(key, new DenseVector(initialValue), 1); + } + } + + public void update(Vector v) { + if (v instanceof Centroid) { + Centroid c = (Centroid) v; + update(c.delegate, c.getWeight()); + } else { + update(v, 1); + } + } + + public void update(Vector other, final double wy) { + final double wx = getWeight(); + delegate.assign(other, Functions.reweigh(wx, wy)); + setWeight(wx + wy); + } + + @Override + public Centroid like() { + return new Centroid(getIndex(), getVector().like(), getWeight()); + } + + /** + * Gets the index of this centroid. Use getIndex instead to maintain standard names. + */ + @Deprecated + public int getKey() { + return getIndex(); + } + + public void addWeight(double newWeight) { + setWeight(getWeight() + newWeight); + } + + @Override + public String toString() { + return String.format("key = %d, weight = %.2f, vector = %s", getIndex(), getWeight(), delegate); + } + + @SuppressWarnings("CloneDoesntCallSuperClone") + @Override + public Centroid clone() { + return new Centroid(this); + } +} http://git-wip-us.apache.org/repos/asf/mahout/blob/545648f6/core/src/main/java/org/apache/mahout/math/CholeskyDecomposition.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/mahout/math/CholeskyDecomposition.java b/core/src/main/java/org/apache/mahout/math/CholeskyDecomposition.java new file mode 100644 index 0000000..5cea8e5 --- /dev/null +++ b/core/src/main/java/org/apache/mahout/math/CholeskyDecomposition.java @@ -0,0 +1,227 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.mahout.math; + +import com.google.common.base.Preconditions; +import org.apache.mahout.math.function.Functions; + +/** + * Cholesky decomposition shamelessly ported from JAMA. + * <p> + * A Cholesky decomposition of a semi-positive definite matrix A is a lower triangular matrix L such + * that L L^* = A. If A is full rank, L is unique. If A is real, then it must be symmetric and R + * will also be real. + */ +public class CholeskyDecomposition { + private final PivotedMatrix L; + private boolean isPositiveDefinite = true; + + public CholeskyDecomposition(Matrix a) { + this(a, true); + } + + public CholeskyDecomposition(Matrix a, boolean pivot) { + int rows = a.rowSize(); + L = new PivotedMatrix(new DenseMatrix(rows, rows)); + + // must be square + Preconditions.checkArgument(rows == a.columnSize(), "Must be a Square Matrix"); + + if (pivot) { + decomposeWithPivoting(a); + } else { + decompose(a); + } + } + + private void decomposeWithPivoting(Matrix a) { + int n = a.rowSize(); + L.assign(a); + + // pivoted column-wise submatrix cholesky with simple pivoting + double uberMax = L.viewDiagonal().aggregate(Functions.MAX, Functions.ABS); + for (int k = 0; k < n; k++) { + double max = 0; + int pivot = k; + for (int j = k; j < n; j++) { + if (L.get(j, j) > max) { + max = L.get(j, j); + pivot = j; + if (uberMax < Math.abs(max)) { + uberMax = Math.abs(max); + } + } + } + L.swap(k, pivot); + + double akk = L.get(k, k); + double epsilon = 1.0e-10 * Math.max(uberMax, L.viewColumn(k).aggregate(Functions.MAX, Functions.ABS)); + + if (akk < -epsilon) { + // can't have decidedly negative element on diagonal + throw new IllegalArgumentException("Matrix is not positive semi-definite"); + } else if (akk <= epsilon) { + // degenerate column case. Set all to zero + L.viewColumn(k).assign(0); + isPositiveDefinite = false; + + // no need to subtract from remaining sub-matrix + } else { + // normalize column by diagonal element + akk = Math.sqrt(Math.max(0, akk)); + L.viewColumn(k).viewPart(k, n - k).assign(Functions.div(akk)); + L.viewColumn(k).viewPart(0, k).assign(0); + + // subtract off scaled version of this column to the right + for (int j = k + 1; j < n; j++) { + Vector columnJ = L.viewColumn(j).viewPart(k, n - k); + Vector columnK = L.viewColumn(k).viewPart(k, n - k); + columnJ.assign(columnK, Functions.minusMult(columnK.get(j - k))); + } + + } + } + } + + private void decompose(Matrix a) { + int n = a.rowSize(); + L.assign(a); + + // column-wise submatrix cholesky with simple pivoting + for (int k = 0; k < n; k++) { + + double akk = L.get(k, k); + + // set upper part of column to 0. + L.viewColumn(k).viewPart(0, k).assign(0); + + double epsilon = 1.0e-10 * L.viewColumn(k).aggregate(Functions.MAX, Functions.ABS); + if (akk <= epsilon) { + // degenerate column case. Set diagonal to 1, all others to zero + L.viewColumn(k).viewPart(k, n - k).assign(0); + + isPositiveDefinite = false; + + // no need to subtract from remaining sub-matrix + } else { + // normalize column by diagonal element + akk = Math.sqrt(Math.max(0, akk)); + L.set(k, k, akk); + L.viewColumn(k).viewPart(k + 1, n - k - 1).assign(Functions.div(akk)); + + // now subtract scaled version of column + for (int j = k + 1; j < n; j++) { + Vector columnJ = L.viewColumn(j).viewPart(j, n - j); + Vector columnK = L.viewColumn(k).viewPart(j, n - j); + columnJ.assign(columnK, Functions.minusMult(L.get(j, k))); + } + } + } + } + + public boolean isPositiveDefinite() { + return isPositiveDefinite; + } + + public Matrix getL() { + return L.getBase(); + } + + public PivotedMatrix getPermutedL() { + return L; + } + + /** + * @return Returns the permutation of rows and columns that was applied to L + */ + public int[] getPivot() { + return L.getRowPivot(); + } + + public int[] getInversePivot() { + return L.getInverseRowPivot(); + } + + /** + * Compute inv(L) * z efficiently. + * + * @param z + */ + public Matrix solveLeft(Matrix z) { + int n = L.columnSize(); + int nx = z.columnSize(); + + Matrix X = new DenseMatrix(n, z.columnSize()); + X.assign(z); + + // Solve L*Y = Z using back-substitution + // note that k and i have to go in a funny order because L is pivoted + for (int internalK = 0; internalK < n; internalK++) { + int k = L.rowUnpivot(internalK); + for (int j = 0; j < nx; j++) { + for (int internalI = 0; internalI < internalK; internalI++) { + int i = L.rowUnpivot(internalI); + X.set(k, j, X.get(k, j) - X.get(i, j) * L.get(k, i)); + } + if (L.get(k, k) != 0) { + X.set(k, j, X.get(k, j) / L.get(k, k)); + } else { + X.set(k, j, 0); + } + } + } + return X; + } + + /** + * Compute z * inv(L') efficiently + */ + public Matrix solveRight(Matrix z) { + int n = z.columnSize(); + int nx = z.rowSize(); + + Matrix x = new DenseMatrix(z.rowSize(), z.columnSize()); + x.assign(z); + + // Solve Y*L' = Z using back-substitution + for (int internalK = 0; internalK < n; internalK++) { + int k = L.rowUnpivot(internalK); + for (int j = 0; j < nx; j++) { + for (int internalI = 0; internalI < k; internalI++) { + int i = L.rowUnpivot(internalI); + x.set(j, k, x.get(j, k) - x.get(j, i) * L.get(k, i)); + if (Double.isInfinite(x.get(j, k)) || Double.isNaN(x.get(j, k))) { + throw new IllegalStateException( + String.format("Invalid value found at %d,%d (should not be possible)", j, k)); + } + } + if (L.get(k, k) != 0) { + x.set(j, k, x.get(j, k) / L.get(k, k)); + } else { + x.set(j, k, 0); + } + if (Double.isInfinite(x.get(j, k)) || Double.isNaN(x.get(j, k))) { + throw new IllegalStateException(String.format("Invalid value found at %d,%d (should not be possible)", j, k)); + } + } + } + return x; + } + +} + http://git-wip-us.apache.org/repos/asf/mahout/blob/545648f6/core/src/main/java/org/apache/mahout/math/ConstantVector.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/mahout/math/ConstantVector.java b/core/src/main/java/org/apache/mahout/math/ConstantVector.java new file mode 100644 index 0000000..f10f631 --- /dev/null +++ b/core/src/main/java/org/apache/mahout/math/ConstantVector.java @@ -0,0 +1,177 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.mahout.math; + +import java.util.Iterator; + +import com.google.common.collect.AbstractIterator; + +/** + * Implements a vector with all the same values. + */ +public class ConstantVector extends AbstractVector { + private final double value; + + public ConstantVector(double value, int size) { + super(size); + this.value = value; + } + + /** + * Subclasses must override to return an appropriately sparse or dense result + * + * @param rows the row cardinality + * @param columns the column cardinality + * @return a Matrix + */ + @Override + protected Matrix matrixLike(int rows, int columns) { + return new DenseMatrix(rows, columns); + } + + /** + * Used internally by assign() to update multiple indices and values at once. + * Only really useful for sparse vectors (especially SequentialAccessSparseVector). + * <p> + * If someone ever adds a new type of sparse vectors, this method must merge (index, value) pairs into the vector. + * + * @param updates a mapping of indices to values to merge in the vector. + */ + @Override + public void mergeUpdates(OrderedIntDoubleMapping updates) { + throw new UnsupportedOperationException("Cannot mutate a ConstantVector"); + } + + /** + * @return true iff this implementation should be considered dense -- that it explicitly represents + * every value + */ + @Override + public boolean isDense() { + return true; + } + + /** + * @return true iff this implementation should be considered to be iterable in index order in an + * efficient way. In particular this implies that {@link #iterator()} and {@link + * #iterateNonZero()} return elements in ascending order by index. + */ + @Override + public boolean isSequentialAccess() { + return true; + } + + /** + * Iterates over all elements <p> + * NOTE: Implementations may choose to reuse the Element returned + * for performance reasons, so if you need a copy of it, you should call {@link #getElement(int)} + * for the given index + * + * @return An {@link java.util.Iterator} over all elements + */ + @Override + public Iterator<Element> iterator() { + return new AbstractIterator<Element>() { + private int i = 0; + private final int n = size(); + @Override + protected Element computeNext() { + if (i < n) { + return new LocalElement(i++); + } else { + return endOfData(); + } + } + }; + } + + /** + * Iterates over all non-zero elements.<p> + * NOTE: Implementations may choose to reuse the Element + * returned for performance reasons, so if you need a copy of it, you should call {@link + * #getElement(int)} for the given index + * + * @return An {@link java.util.Iterator} over all non-zero elements + */ + @Override + public Iterator<Element> iterateNonZero() { + return iterator(); + } + + /** + * Return the value at the given index, without checking bounds + * + * @param index an int index + * @return the double at the index + */ + @Override + public double getQuick(int index) { + return value; + } + + /** + * Return an empty vector of the same underlying class as the receiver + * + * @return a Vector + */ + @Override + public Vector like() { + return new DenseVector(size()); + } + + @Override + public Vector like(int cardinality) { + return new DenseVector(cardinality); + } + + /** + * Set the value at the given index, without checking bounds + * + * @param index an int index into the receiver + * @param value a double value to set + */ + @Override + public void setQuick(int index, double value) { + throw new UnsupportedOperationException("Can't set a value in a constant matrix"); + } + + /** + * Return the number of values in the recipient + * + * @return an int + */ + @Override + public int getNumNondefaultElements() { + return size(); + } + + @Override + public double getLookupCost() { + return 1; + } + + @Override + public double getIteratorAdvanceCost() { + return 1; + } + + @Override + public boolean isAddConstantTime() { + throw new UnsupportedOperationException("Cannot mutate a ConstantVector"); + } +} http://git-wip-us.apache.org/repos/asf/mahout/blob/545648f6/core/src/main/java/org/apache/mahout/math/DelegatingVector.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/mahout/math/DelegatingVector.java b/core/src/main/java/org/apache/mahout/math/DelegatingVector.java new file mode 100644 index 0000000..0b2e36b --- /dev/null +++ b/core/src/main/java/org/apache/mahout/math/DelegatingVector.java @@ -0,0 +1,336 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.mahout.math; + +import org.apache.mahout.math.function.DoubleDoubleFunction; +import org.apache.mahout.math.function.DoubleFunction; + +/** + * A delegating vector provides an easy way to decorate vectors with weights or id's and such while + * keeping all of the Vector functionality. + * + * This vector implements LengthCachingVector because almost all delegates cache the length and + * the cost of false positives is very low. + */ +public class DelegatingVector implements Vector, LengthCachingVector { + protected Vector delegate; + + public DelegatingVector(Vector v) { + delegate = v; + } + + protected DelegatingVector() { + } + + public Vector getVector() { + return delegate; + } + + @Override + public double aggregate(DoubleDoubleFunction aggregator, DoubleFunction map) { + return delegate.aggregate(aggregator, map); + } + + @Override + public double aggregate(Vector other, DoubleDoubleFunction aggregator, DoubleDoubleFunction combiner) { + return delegate.aggregate(other, aggregator, combiner); + } + + @Override + public Vector viewPart(int offset, int length) { + return delegate.viewPart(offset, length); + } + + @SuppressWarnings("CloneDoesntDeclareCloneNotSupportedException") + @Override + public Vector clone() { + DelegatingVector r; + try { + r = (DelegatingVector) super.clone(); + } catch (CloneNotSupportedException e) { + throw new RuntimeException("Clone not supported for DelegatingVector, shouldn't be possible"); + } + // delegate points to original without this + r.delegate = delegate.clone(); + return r; + } + + @Override + public Iterable<Element> all() { + return delegate.all(); + } + + @Override + public Iterable<Element> nonZeroes() { + return delegate.nonZeroes(); + } + + @Override + public Vector divide(double x) { + return delegate.divide(x); + } + + @Override + public double dot(Vector x) { + return delegate.dot(x); + } + + @Override + public double get(int index) { + return delegate.get(index); + } + + @Override + public Element getElement(int index) { + return delegate.getElement(index); + } + + /** + * Merge a set of (index, value) pairs into the vector. + * + * @param updates an ordered mapping of indices to values to be merged in. + */ + @Override + public void mergeUpdates(OrderedIntDoubleMapping updates) { + delegate.mergeUpdates(updates); + } + + @Override + public Vector minus(Vector that) { + return delegate.minus(that); + } + + @Override + public Vector normalize() { + return delegate.normalize(); + } + + @Override + public Vector normalize(double power) { + return delegate.normalize(power); + } + + @Override + public Vector logNormalize() { + return delegate.logNormalize(); + } + + @Override + public Vector logNormalize(double power) { + return delegate.logNormalize(power); + } + + @Override + public double norm(double power) { + return delegate.norm(power); + } + + @Override + public double getLengthSquared() { + return delegate.getLengthSquared(); + } + + @Override + public void invalidateCachedLength() { + if (delegate instanceof LengthCachingVector) { + ((LengthCachingVector) delegate).invalidateCachedLength(); + } + } + + @Override + public double getDistanceSquared(Vector v) { + return delegate.getDistanceSquared(v); + } + + @Override + public double getLookupCost() { + return delegate.getLookupCost(); + } + + @Override + public double getIteratorAdvanceCost() { + return delegate.getIteratorAdvanceCost(); + } + + @Override + public boolean isAddConstantTime() { + return delegate.isAddConstantTime(); + } + + @Override + public double maxValue() { + return delegate.maxValue(); + } + + @Override + public int maxValueIndex() { + return delegate.maxValueIndex(); + } + + @Override + public double minValue() { + return delegate.minValue(); + } + + @Override + public int minValueIndex() { + return delegate.minValueIndex(); + } + + @Override + public Vector plus(double x) { + return delegate.plus(x); + } + + @Override + public Vector plus(Vector x) { + return delegate.plus(x); + } + + @Override + public void set(int index, double value) { + delegate.set(index, value); + } + + @Override + public Vector times(double x) { + return delegate.times(x); + } + + @Override + public Vector times(Vector x) { + return delegate.times(x); + } + + @Override + public double zSum() { + return delegate.zSum(); + } + + @Override + public Vector assign(double value) { + delegate.assign(value); + return this; + } + + @Override + public Vector assign(double[] values) { + delegate.assign(values); + return this; + } + + @Override + public Vector assign(Vector other) { + delegate.assign(other); + return this; + } + + @Override + public Vector assign(DoubleDoubleFunction f, double y) { + delegate.assign(f, y); + return this; + } + + @Override + public Vector assign(DoubleFunction function) { + delegate.assign(function); + return this; + } + + @Override + public Vector assign(Vector other, DoubleDoubleFunction function) { + delegate.assign(other, function); + return this; + } + + @Override + public Matrix cross(Vector other) { + return delegate.cross(other); + } + + @Override + public int size() { + return delegate.size(); + } + + @Override + public String asFormatString() { + return delegate.asFormatString(); + } + + @Override + public int hashCode() { + return delegate.hashCode(); + } + + @SuppressWarnings("EqualsWhichDoesntCheckParameterClass") + @Override + public boolean equals(Object o) { + return delegate.equals(o); + } + + @Override + public String toString() { + return delegate.toString(); + } + + @Override + public boolean isDense() { + return delegate.isDense(); + } + + @Override + public boolean isSequentialAccess() { + return delegate.isSequentialAccess(); + } + + @Override + public double getQuick(int index) { + return delegate.getQuick(index); + } + + @Override + public Vector like() { + return new DelegatingVector(delegate.like()); + } + + @Override + public Vector like(int cardinality) { + return new DelegatingVector(delegate.like(cardinality)); + } + + @Override + public void setQuick(int index, double value) { + delegate.setQuick(index, value); + } + + @Override + public void incrementQuick(int index, double increment) { + delegate.incrementQuick(index, increment); + } + + @Override + public int getNumNondefaultElements() { + return delegate.getNumNondefaultElements(); + } + + @Override + public int getNumNonZeroElements() { + return delegate.getNumNonZeroElements(); + } +} http://git-wip-us.apache.org/repos/asf/mahout/blob/545648f6/core/src/main/java/org/apache/mahout/math/DenseMatrix.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/mahout/math/DenseMatrix.java b/core/src/main/java/org/apache/mahout/math/DenseMatrix.java new file mode 100644 index 0000000..eac449a --- /dev/null +++ b/core/src/main/java/org/apache/mahout/math/DenseMatrix.java @@ -0,0 +1,193 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.mahout.math; + +import org.apache.mahout.math.flavor.MatrixFlavor; + +import java.util.Arrays; + +/** Matrix of doubles implemented using a 2-d array */ +public class DenseMatrix extends AbstractMatrix { + + private double[][] values; + + /** + * Construct a matrix from the given values + * + * @param values + * a double[][] + */ + public DenseMatrix(double[][] values) { + this(values, false); + } + + /** + * Construct a matrix from the given values + * + * @param values + * a double[][] + * @param shallowCopy directly use the supplied array? + */ + public DenseMatrix(double[][] values, boolean shallowCopy) { + super(values.length, values[0].length); + if (shallowCopy) { + this.values = values; + } else { + this.values = new double[values.length][]; + for (int i = 0; i < values.length; i++) { + this.values[i] = values[i].clone(); + } + } + } + + /** + * Constructs an empty matrix of the given size. + * @param rows The number of rows in the result. + * @param columns The number of columns in the result. + */ + public DenseMatrix(int rows, int columns) { + super(rows, columns); + this.values = new double[rows][columns]; + } + + /** + * Returns the backing array + * @return double[][] + */ + public double[][] getBackingStructure() { + return this.values; + } + + @Override + public Matrix clone() { + DenseMatrix clone = (DenseMatrix) super.clone(); + clone.values = new double[values.length][]; + for (int i = 0; i < values.length; i++) { + clone.values[i] = values[i].clone(); + } + return clone; + } + + @Override + public double getQuick(int row, int column) { + return values[row][column]; + } + + @Override + public Matrix like() { + return like(rowSize(), columnSize()); + } + + @Override + public Matrix like(int rows, int columns) { + return new DenseMatrix(rows, columns); + } + + @Override + public void setQuick(int row, int column, double value) { + values[row][column] = value; + } + + @Override + public Matrix viewPart(int[] offset, int[] size) { + int rowOffset = offset[ROW]; + int rowsRequested = size[ROW]; + int columnOffset = offset[COL]; + int columnsRequested = size[COL]; + + return viewPart(rowOffset, rowsRequested, columnOffset, columnsRequested); + } + + @Override + public Matrix viewPart(int rowOffset, int rowsRequested, int columnOffset, int columnsRequested) { + if (rowOffset < 0) { + throw new IndexException(rowOffset, rowSize()); + } + if (rowOffset + rowsRequested > rowSize()) { + throw new IndexException(rowOffset + rowsRequested, rowSize()); + } + if (columnOffset < 0) { + throw new IndexException(columnOffset, columnSize()); + } + if (columnOffset + columnsRequested > columnSize()) { + throw new IndexException(columnOffset + columnsRequested, columnSize()); + } + return new MatrixView(this, new int[]{rowOffset, columnOffset}, new int[]{rowsRequested, columnsRequested}); + } + + @Override + public Matrix assign(double value) { + for (int row = 0; row < rowSize(); row++) { + Arrays.fill(values[row], value); + } + return this; + } + + public Matrix assign(DenseMatrix matrix) { + // make sure the data field has the correct length + if (matrix.values[0].length != this.values[0].length || matrix.values.length != this.values.length) { + this.values = new double[matrix.values.length][matrix.values[0].length]; + } + // now copy the values + for (int i = 0; i < this.values.length; i++) { + System.arraycopy(matrix.values[i], 0, this.values[i], 0, this.values[0].length); + } + return this; + } + + @Override + public Matrix assignColumn(int column, Vector other) { + if (rowSize() != other.size()) { + throw new CardinalityException(rowSize(), other.size()); + } + if (column < 0 || column >= columnSize()) { + throw new IndexException(column, columnSize()); + } + for (int row = 0; row < rowSize(); row++) { + values[row][column] = other.getQuick(row); + } + return this; + } + + @Override + public Matrix assignRow(int row, Vector other) { + if (columnSize() != other.size()) { + throw new CardinalityException(columnSize(), other.size()); + } + if (row < 0 || row >= rowSize()) { + throw new IndexException(row, rowSize()); + } + for (int col = 0; col < columnSize(); col++) { + values[row][col] = other.getQuick(col); + } + return this; + } + + @Override + public Vector viewRow(int row) { + if (row < 0 || row >= rowSize()) { + throw new IndexException(row, rowSize()); + } + return new DenseVector(values[row], true); + } + + @Override + public MatrixFlavor getFlavor() { + return MatrixFlavor.DENSELIKE; + } +} http://git-wip-us.apache.org/repos/asf/mahout/blob/545648f6/core/src/main/java/org/apache/mahout/math/DenseSymmetricMatrix.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/mahout/math/DenseSymmetricMatrix.java b/core/src/main/java/org/apache/mahout/math/DenseSymmetricMatrix.java new file mode 100644 index 0000000..7252b9b --- /dev/null +++ b/core/src/main/java/org/apache/mahout/math/DenseSymmetricMatrix.java @@ -0,0 +1,62 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.mahout.math; + +import org.apache.mahout.math.flavor.TraversingStructureEnum; + +/** + * Economy packaging for a dense symmetric in-core matrix. + */ +public class DenseSymmetricMatrix extends UpperTriangular { + public DenseSymmetricMatrix(int n) { + super(n); + } + + public DenseSymmetricMatrix(double[] data, boolean shallow) { + super(data, shallow); + } + + public DenseSymmetricMatrix(Vector data) { + super(data); + } + + public DenseSymmetricMatrix(UpperTriangular mx) { + super(mx); + } + + @Override + public double getQuick(int row, int column) { + if (column < row) { + int swap = row; + row = column; + column = swap; + } + return super.getQuick(row, column); + } + + @Override + public void setQuick(int row, int column, double value) { + if (column < row) { + int swap = row; + row = column; + column = swap; + } + super.setQuick(row, column, value); + } + +} http://git-wip-us.apache.org/repos/asf/mahout/blob/545648f6/core/src/main/java/org/apache/mahout/math/DenseVector.java ---------------------------------------------------------------------- diff --git a/core/src/main/java/org/apache/mahout/math/DenseVector.java b/core/src/main/java/org/apache/mahout/math/DenseVector.java new file mode 100644 index 0000000..3961966 --- /dev/null +++ b/core/src/main/java/org/apache/mahout/math/DenseVector.java @@ -0,0 +1,442 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.mahout.math; + +import java.util.Arrays; +import java.util.Iterator; +import java.util.NoSuchElementException; + +import com.google.common.base.Preconditions; + +/** Implements vector as an array of doubles */ +public class DenseVector extends AbstractVector { + + private double[] values; + + /** For serialization purposes only */ + public DenseVector() { + super(0); + } + + /** Construct a new instance using provided values + * @param values - array of values + */ + public DenseVector(double[] values) { + this(values, false); + } + + public DenseVector(double[] values, boolean shallowCopy) { + super(values.length); + this.values = shallowCopy ? values : values.clone(); + } + + public DenseVector(DenseVector values, boolean shallowCopy) { + this(values.values, shallowCopy); + } + + /** Construct a new instance of the given cardinality + * @param cardinality - number of values in the vector + */ + public DenseVector(int cardinality) { + super(cardinality); + this.values = new double[cardinality]; + } + + /** + * Copy-constructor (for use in turning a sparse vector into a dense one, for example) + * @param vector The vector to copy + */ + public DenseVector(Vector vector) { + super(vector.size()); + values = new double[vector.size()]; + for (Element e : vector.nonZeroes()) { + values[e.index()] = e.get(); + } + } + + @Override + public double dot(Vector x) { + if (!x.isDense()) { + return super.dot(x); + } else { + + int size = x.size(); + if (values.length != size) { + throw new CardinalityException(values.length, size); + } + + double sum = 0; + for (int n = 0; n < size; n++) { + sum += values[n] * x.getQuick(n); + } + return sum; + } + } + + @Override + protected Matrix matrixLike(int rows, int columns) { + return new DenseMatrix(rows, columns); + } + + @SuppressWarnings("CloneDoesntCallSuperClone") + @Override + public DenseVector clone() { + return new DenseVector(values.clone()); + } + + /** + * @return true + */ + @Override + public boolean isDense() { + return true; + } + + /** + * @return true + */ + @Override + public boolean isSequentialAccess() { + return true; + } + + @Override + protected double dotSelf() { + double result = 0.0; + int max = size(); + for (int i = 0; i < max; i++) { + result += values[i] * values[i]; + } + return result; + } + + @Override + public double getQuick(int index) { + return values[index]; + } + + @Override + public DenseVector like() { + return new DenseVector(size()); + } + + @Override + public Vector like(int cardinality) { + return new DenseVector(cardinality); + } + + @Override + public void setQuick(int index, double value) { + invalidateCachedLength(); + values[index] = value; + } + + @Override + public void incrementQuick(int index, double increment) { + invalidateCachedLength(); + values[index] += increment; + } + + @Override + public Vector assign(double value) { + invalidateCachedLength(); + Arrays.fill(values, value); + return this; + } + + @Override + public int getNumNondefaultElements() { + return values.length; + } + + @Override + public int getNumNonZeroElements() { + int numNonZeros = 0; + for (int index = 0; index < values.length; index++) { + if (values[index] != 0) { + numNonZeros++; + } + } + return numNonZeros; + } + + public Vector assign(DenseVector vector) { + // make sure the data field has the correct length + if (vector.values.length != this.values.length) { + this.values = new double[vector.values.length]; + } + // now copy the values + System.arraycopy(vector.values, 0, this.values, 0, this.values.length); + return this; + } + + @Override + public void mergeUpdates(OrderedIntDoubleMapping updates) { + int numUpdates = updates.getNumMappings(); + int[] indices = updates.getIndices(); + double[] values = updates.getValues(); + for (int i = 0; i < numUpdates; ++i) { + this.values[indices[i]] = values[i]; + } + } + + @Override + public Vector viewPart(int offset, int length) { + if (offset < 0) { + throw new IndexException(offset, size()); + } + if (offset + length > size()) { + throw new IndexException(offset + length, size()); + } + return new DenseVectorView(this, offset, length); + } + + @Override + public double getLookupCost() { + return 1; + } + + @Override + public double getIteratorAdvanceCost() { + return 1; + } + + @Override + public boolean isAddConstantTime() { + return true; + } + + /** + * Returns an iterator that traverses this Vector from 0 to cardinality-1, in that order. + */ + @Override + public Iterator<Element> iterateNonZero() { + return new NonDefaultIterator(); + } + + @Override + public Iterator<Element> iterator() { + return new AllIterator(); + } + + @Override + public boolean equals(Object o) { + if (o instanceof DenseVector) { + // Speedup for DenseVectors + return Arrays.equals(values, ((DenseVector) o).values); + } + return super.equals(o); + } + + public void addAll(Vector v) { + if (size() != v.size()) { + throw new CardinalityException(size(), v.size()); + } + + for (Element element : v.nonZeroes()) { + values[element.index()] += element.get(); + } + } + + private final class NonDefaultIterator implements Iterator<Element> { + private final DenseElement element = new DenseElement(); + private int index = -1; + private int lookAheadIndex = -1; + + @Override + public boolean hasNext() { + if (lookAheadIndex == index) { // User calls hasNext() after a next() + lookAhead(); + } // else user called hasNext() repeatedly. + return lookAheadIndex < size(); + } + + private void lookAhead() { + lookAheadIndex++; + while (lookAheadIndex < size() && values[lookAheadIndex] == 0.0) { + lookAheadIndex++; + } + } + + @Override + public Element next() { + if (lookAheadIndex == index) { // If user called next() without checking hasNext(). + lookAhead(); + } + + Preconditions.checkState(lookAheadIndex > index); + index = lookAheadIndex; + + if (index >= size()) { // If the end is reached. + throw new NoSuchElementException(); + } + + element.index = index; + return element; + } + + @Override + public void remove() { + throw new UnsupportedOperationException(); + } + } + + private final class AllIterator implements Iterator<Element> { + private final DenseElement element = new DenseElement(); + + private AllIterator() { + element.index = -1; + } + + @Override + public boolean hasNext() { + return element.index + 1 < size(); + } + + @Override + public Element next() { + if (element.index + 1 >= size()) { // If the end is reached. + throw new NoSuchElementException(); + } + element.index++; + return element; + } + + @Override + public void remove() { + throw new UnsupportedOperationException(); + } + } + + private final class DenseElement implements Element { + int index; + + @Override + public double get() { + return values[index]; + } + + @Override + public int index() { + return index; + } + + @Override + public void set(double value) { + invalidateCachedLength(); + values[index] = value; + } + } + + private final class DenseVectorView extends VectorView { + + public DenseVectorView(Vector vector, int offset, int cardinality) { + super(vector, offset, cardinality); + } + + @Override + public double dot(Vector x) { + + // Apply custom dot kernels for pairs of dense vectors or their views to reduce + // view indirection. + if (x instanceof DenseVectorView) { + + if (size() != x.size()) + throw new IllegalArgumentException("Cardinality mismatch during dot(x,y)."); + + DenseVectorView xv = (DenseVectorView) x; + double[] thisValues = ((DenseVector) vector).values; + double[] thatValues = ((DenseVector) xv.vector).values; + int untilOffset = offset + size(); + + int i, j; + double sum = 0.0; + + // Provoking SSE + int until4 = offset + (size() & ~3); + for ( + i = offset, j = xv.offset; + i < until4; + i += 4, j += 4 + ) { + sum += thisValues[i] * thatValues[j] + + thisValues[i + 1] * thatValues[j + 1] + + thisValues[i + 2] * thatValues[j + 2] + + thisValues[i + 3] * thatValues[j + 3]; + } + + // Picking up the slack + for ( + i = offset, j = xv.offset; + i < untilOffset; + ) { + sum += thisValues[i++] * thatValues[j++]; + } + return sum; + + } else if (x instanceof DenseVector ) { + + if (size() != x.size()) + throw new IllegalArgumentException("Cardinality mismatch during dot(x,y)."); + + DenseVector xv = (DenseVector) x; + double[] thisValues = ((DenseVector) vector).values; + double[] thatValues = xv.values; + int untilOffset = offset + size(); + + int i, j; + double sum = 0.0; + + // Provoking SSE + int until4 = offset + (size() & ~3); + for ( + i = offset, j = 0; + i < until4; + i += 4, j += 4 + ) { + sum += thisValues[i] * thatValues[j] + + thisValues[i + 1] * thatValues[j + 1] + + thisValues[i + 2] * thatValues[j + 2] + + thisValues[i + 3] * thatValues[j + 3]; + } + + // Picking up slack + for ( ; + i < untilOffset; + ) { + sum += thisValues[i++] * thatValues[j++]; + } + return sum; + + } else { + return super.dot(x); + } + } + + @Override + public Vector viewPart(int offset, int length) { + if (offset < 0) { + throw new IndexException(offset, size()); + } + if (offset + length > size()) { + throw new IndexException(offset + length, size()); + } + return new DenseVectorView(vector, offset + this.offset, length); + } + } +}
