http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/9b178f63/samoa-api/src/main/java/org/apache/samoa/moa/core/Utils.java ---------------------------------------------------------------------- diff --git a/samoa-api/src/main/java/org/apache/samoa/moa/core/Utils.java b/samoa-api/src/main/java/org/apache/samoa/moa/core/Utils.java new file mode 100644 index 0000000..e45db02 --- /dev/null +++ b/samoa-api/src/main/java/org/apache/samoa/moa/core/Utils.java @@ -0,0 +1,1982 @@ +/* + * Utils.java + + * + */ + +package org.apache.samoa.moa.core; + +/* + * #%L + * SAMOA + * %% + * Copyright (C) 2014 - 2015 Apache Software Foundation + * %% + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * #L% + */ + +import java.io.BufferedReader; +import java.io.BufferedWriter; +import java.io.File; +import java.io.FileInputStream; +import java.io.FileReader; +import java.io.FileWriter; +import java.lang.reflect.Array; +import java.net.URL; +import java.text.BreakIterator; +import java.util.Enumeration; +import java.util.Properties; +import java.util.Random; +import java.util.Vector; + +/** + * Class implementing some simple utility methods. + * + * @author Eibe Frank + * @author Yong Wang + * @author Len Trigg + * @author Julien Prados + * @version $Revision: 8080 $ + */ +public final class Utils { + + /** The natural logarithm of 2. */ + public static double log2 = Math.log(2); + + /** The small deviation allowed in double comparisons. */ + public static double SMALL = 1e-6; + + /** + * Tests if the given value codes "missing". + * + * @param val + * the value to be tested + * @return true if val codes "missing" + */ + public static boolean isMissingValue(double val) { + + return Double.isNaN(val); + } + + /** + * Returns the value used to code a missing value. Note that equality tests on this value will always return false, so + * use isMissingValue(double val) for testing.. + * + * @return the value used as missing value. + */ + public static double missingValue() { + + return Double.NaN; + } + + /** + * Casting an object without "unchecked" compile-time warnings. Use only when absolutely necessary (e.g. when using + * clone()). + */ + @SuppressWarnings("unchecked") + public static <T> T cast(Object x) { + return (T) x; + } + + /** + * Returns the correlation coefficient of two double vectors. + * + * @param y1 + * double vector 1 + * @param y2 + * double vector 2 + * @param n + * the length of two double vectors + * @return the correlation coefficient + */ + public static final double correlation(double y1[], double y2[], int n) { + + int i; + double av1 = 0.0, av2 = 0.0, y11 = 0.0, y22 = 0.0, y12 = 0.0, c; + + if (n <= 1) { + return 1.0; + } + for (i = 0; i < n; i++) { + av1 += y1[i]; + av2 += y2[i]; + } + av1 /= (double) n; + av2 /= (double) n; + for (i = 0; i < n; i++) { + y11 += (y1[i] - av1) * (y1[i] - av1); + y22 += (y2[i] - av2) * (y2[i] - av2); + y12 += (y1[i] - av1) * (y2[i] - av2); + } + if (y11 * y22 == 0.0) { + c = 1.0; + } else { + c = y12 / Math.sqrt(Math.abs(y11 * y22)); + } + + return c; + } + + /** + * Removes all occurrences of a string from another string. + * + * @param inString + * the string to remove substrings from. + * @param substring + * the substring to remove. + * @return the input string with occurrences of substring removed. + */ + public static String removeSubstring(String inString, String substring) { + + StringBuffer result = new StringBuffer(); + int oldLoc = 0, loc = 0; + while ((loc = inString.indexOf(substring, oldLoc)) != -1) { + result.append(inString.substring(oldLoc, loc)); + oldLoc = loc + substring.length(); + } + result.append(inString.substring(oldLoc)); + return result.toString(); + } + + /** + * Replaces with a new string, all occurrences of a string from another string. + * + * @param inString + * the string to replace substrings in. + * @param subString + * the substring to replace. + * @param replaceString + * the replacement substring + * @return the input string with occurrences of substring replaced. + */ + public static String replaceSubstring(String inString, String subString, + String replaceString) { + + StringBuffer result = new StringBuffer(); + int oldLoc = 0, loc = 0; + while ((loc = inString.indexOf(subString, oldLoc)) != -1) { + result.append(inString.substring(oldLoc, loc)); + result.append(replaceString); + oldLoc = loc + subString.length(); + } + result.append(inString.substring(oldLoc)); + return result.toString(); + } + + /** + * Pads a string to a specified length, inserting spaces on the left as required. If the string is too long, + * characters are removed (from the right). + * + * @param inString + * the input string + * @param length + * the desired length of the output string + * @return the output string + */ + public static String padLeft(String inString, int length) { + + return fixStringLength(inString, length, false); + } + + /** + * Pads a string to a specified length, inserting spaces on the right as required. If the string is too long, + * characters are removed (from the right). + * + * @param inString + * the input string + * @param length + * the desired length of the output string + * @return the output string + */ + public static String padRight(String inString, int length) { + + return fixStringLength(inString, length, true); + } + + /** + * Pads a string to a specified length, inserting spaces as required. If the string is too long, characters are + * removed (from the right). + * + * @param inString + * the input string + * @param length + * the desired length of the output string + * @param right + * true if inserted spaces should be added to the right + * @return the output string + */ + private static/* @pure@ */String fixStringLength(String inString, int length, + boolean right) { + + if (inString.length() < length) { + while (inString.length() < length) { + inString = (right ? inString.concat(" ") : " ".concat(inString)); + } + } else if (inString.length() > length) { + inString = inString.substring(0, length); + } + return inString; + } + + /** + * Rounds a double and converts it into String. + * + * @param value + * the double value + * @param afterDecimalPoint + * the (maximum) number of digits permitted after the decimal point + * @return the double as a formatted string + */ + public static/* @pure@ */String doubleToString(double value, int afterDecimalPoint) { + + StringBuffer stringBuffer; + double temp; + int dotPosition; + long precisionValue; + + temp = value * Math.pow(10.0, afterDecimalPoint); + if (Math.abs(temp) < Long.MAX_VALUE) { + precisionValue = (temp > 0) ? (long) (temp + 0.5) + : -(long) (Math.abs(temp) + 0.5); + if (precisionValue == 0) { + stringBuffer = new StringBuffer(String.valueOf(0)); + } else { + stringBuffer = new StringBuffer(String.valueOf(precisionValue)); + } + if (afterDecimalPoint == 0) { + return stringBuffer.toString(); + } + dotPosition = stringBuffer.length() - afterDecimalPoint; + while (((precisionValue < 0) && (dotPosition < 1)) || + (dotPosition < 0)) { + if (precisionValue < 0) { + stringBuffer.insert(1, '0'); + } else { + stringBuffer.insert(0, '0'); + } + dotPosition++; + } + stringBuffer.insert(dotPosition, '.'); + if ((precisionValue < 0) && (stringBuffer.charAt(1) == '.')) { + stringBuffer.insert(1, '0'); + } else if (stringBuffer.charAt(0) == '.') { + stringBuffer.insert(0, '0'); + } + int currentPos = stringBuffer.length() - 1; + while ((currentPos > dotPosition) && + (stringBuffer.charAt(currentPos) == '0')) { + stringBuffer.setCharAt(currentPos--, ' '); + } + if (stringBuffer.charAt(currentPos) == '.') { + stringBuffer.setCharAt(currentPos, ' '); + } + + return stringBuffer.toString().trim(); + } + return new String("" + value); + } + + /** + * Rounds a double and converts it into a formatted decimal-justified String. Trailing 0's are replaced with spaces. + * + * @param value + * the double value + * @param width + * the width of the string + * @param afterDecimalPoint + * the number of digits after the decimal point + * @return the double as a formatted string + */ + public static/* @pure@ */String doubleToString(double value, int width, + int afterDecimalPoint) { + + String tempString = doubleToString(value, afterDecimalPoint); + char[] result; + int dotPosition; + + if ((afterDecimalPoint >= width) + || (tempString.indexOf('E') != -1)) { // Protects sci notation + return tempString; + } + + // Initialize result + result = new char[width]; + for (int i = 0; i < result.length; i++) { + result[i] = ' '; + } + + if (afterDecimalPoint > 0) { + // Get position of decimal point and insert decimal point + dotPosition = tempString.indexOf('.'); + if (dotPosition == -1) { + dotPosition = tempString.length(); + } else { + result[width - afterDecimalPoint - 1] = '.'; + } + } else { + dotPosition = tempString.length(); + } + + int offset = width - afterDecimalPoint - dotPosition; + if (afterDecimalPoint > 0) { + offset--; + } + + // Not enough room to decimal align within the supplied width + if (offset < 0) { + return tempString; + } + + // Copy characters before decimal point + for (int i = 0; i < dotPosition; i++) { + result[offset + i] = tempString.charAt(i); + } + + // Copy characters after decimal point + for (int i = dotPosition + 1; i < tempString.length(); i++) { + result[offset + i] = tempString.charAt(i); + } + + return new String(result); + } + + /** + * Returns the basic class of an array class (handles multi-dimensional arrays). + * + * @param c + * the array to inspect + * @return the class of the innermost elements + */ + public static Class getArrayClass(Class c) { + if (c.getComponentType().isArray()) + return getArrayClass(c.getComponentType()); + else + return c.getComponentType(); + } + + /** + * Returns the dimensions of the given array. Even though the parameter is of type "Object" one can hand over primitve + * arrays, e.g. int[3] or double[2][4]. + * + * @param array + * the array to determine the dimensions for + * @return the dimensions of the array + */ + public static int getArrayDimensions(Class array) { + if (array.getComponentType().isArray()) + return 1 + getArrayDimensions(array.getComponentType()); + else + return 1; + } + + /** + * Returns the dimensions of the given array. Even though the parameter is of type "Object" one can hand over primitve + * arrays, e.g. int[3] or double[2][4]. + * + * @param array + * the array to determine the dimensions for + * @return the dimensions of the array + */ + public static int getArrayDimensions(Object array) { + return getArrayDimensions(array.getClass()); + } + + /** + * Returns the given Array in a string representation. Even though the parameter is of type "Object" one can hand over + * primitve arrays, e.g. int[3] or double[2][4]. + * + * @param array + * the array to return in a string representation + * @return the array as string + */ + public static String arrayToString(Object array) { + String result; + int dimensions; + int i; + + result = ""; + dimensions = getArrayDimensions(array); + + if (dimensions == 0) { + result = "null"; + } + else if (dimensions == 1) { + for (i = 0; i < Array.getLength(array); i++) { + if (i > 0) + result += ","; + if (Array.get(array, i) == null) + result += "null"; + else + result += Array.get(array, i).toString(); + } + } + else { + for (i = 0; i < Array.getLength(array); i++) { + if (i > 0) + result += ","; + result += "[" + arrayToString(Array.get(array, i)) + "]"; + } + } + + return result; + } + + /** + * Tests if a is equal to b. + * + * @param a + * a double + * @param b + * a double + */ + public static/* @pure@ */boolean eq(double a, double b) { + + return (a - b < SMALL) && (b - a < SMALL); + } + + /** + * Checks if the given array contains any non-empty options. + * + * @param options + * an array of strings + * @exception Exception + * if there are any non-empty options + */ + public static void checkForRemainingOptions(String[] options) + throws Exception { + + int illegalOptionsFound = 0; + StringBuffer text = new StringBuffer(); + + if (options == null) { + return; + } + for (int i = 0; i < options.length; i++) { + if (options[i].length() > 0) { + illegalOptionsFound++; + text.append(options[i] + ' '); + } + } + if (illegalOptionsFound > 0) { + throw new Exception("Illegal options: " + text); + } + } + + /** + * Checks if the given array contains the flag "-Char". Stops searching at the first marker "--". If the flag is + * found, it is replaced with the empty string. + * + * @param flag + * the character indicating the flag. + * @param options + * the array of strings containing all the options. + * @return true if the flag was found + * @exception Exception + * if an illegal option was found + */ + public static boolean getFlag(char flag, String[] options) + throws Exception { + + return getFlag("" + flag, options); + } + + /** + * Checks if the given array contains the flag "-String". Stops searching at the first marker "--". If the flag is + * found, it is replaced with the empty string. + * + * @param flag + * the String indicating the flag. + * @param options + * the array of strings containing all the options. + * @return true if the flag was found + * @exception Exception + * if an illegal option was found + */ + public static boolean getFlag(String flag, String[] options) + throws Exception { + + int pos = getOptionPos(flag, options); + + if (pos > -1) + options[pos] = ""; + + return (pos > -1); + } + + /** + * Gets an option indicated by a flag "-Char" from the given array of strings. Stops searching at the first marker + * "--". Replaces flag and option with empty strings. + * + * @param flag + * the character indicating the option. + * @param options + * the array of strings containing all the options. + * @return the indicated option or an empty string + * @exception Exception + * if the option indicated by the flag can't be found + */ + public static/* @non_null@ */String getOption(char flag, String[] options) + throws Exception { + + return getOption("" + flag, options); + } + + /** + * Gets an option indicated by a flag "-String" from the given array of strings. Stops searching at the first marker + * "--". Replaces flag and option with empty strings. + * + * @param flag + * the String indicating the option. + * @param options + * the array of strings containing all the options. + * @return the indicated option or an empty string + * @exception Exception + * if the option indicated by the flag can't be found + */ + public static/* @non_null@ */String getOption(String flag, String[] options) + throws Exception { + + String newString; + int i = getOptionPos(flag, options); + + if (i > -1) { + if (options[i].equals("-" + flag)) { + if (i + 1 == options.length) { + throw new Exception("No value given for -" + flag + " option."); + } + options[i] = ""; + newString = new String(options[i + 1]); + options[i + 1] = ""; + return newString; + } + if (options[i].charAt(1) == '-') { + return ""; + } + } + + return ""; + } + + /** + * Gets the index of an option or flag indicated by a flag "-Char" from the given array of strings. Stops searching at + * the first marker "--". + * + * @param flag + * the character indicating the option. + * @param options + * the array of strings containing all the options. + * @return the position if found, or -1 otherwise + */ + public static int getOptionPos(char flag, String[] options) { + return getOptionPos("" + flag, options); + } + + /** + * Gets the index of an option or flag indicated by a flag "-String" from the given array of strings. Stops searching + * at the first marker "--". + * + * @param flag + * the String indicating the option. + * @param options + * the array of strings containing all the options. + * @return the position if found, or -1 otherwise + */ + public static int getOptionPos(String flag, String[] options) { + if (options == null) + return -1; + + for (int i = 0; i < options.length; i++) { + if ((options[i].length() > 0) && (options[i].charAt(0) == '-')) { + // Check if it is a negative number + try { + Double.valueOf(options[i]); + } catch (NumberFormatException e) { + // found? + if (options[i].equals("-" + flag)) + return i; + // did we reach "--"? + if (options[i].charAt(1) == '-') + return -1; + } + } + } + + return -1; + } + + /** + * Quotes a string if it contains special characters. + * + * The following rules are applied: + * + * A character is backquoted version of it is one of <tt>" ' % \ \n \r \t</tt> . + * + * A string is enclosed within single quotes if a character has been backquoted using the previous rule above or + * contains <tt>{ }</tt> or is exactly equal to the strings <tt>, ? space or ""</tt> (empty string). + * + * A quoted question mark distinguishes it from the missing value which is represented as an unquoted question mark in + * arff files. + * + * @param string + * the string to be quoted + * @return the string (possibly quoted) + * @see #unquote(String) + */ + public static/* @pure@ */String quote(String string) { + boolean quote = false; + + // backquote the following characters + if ((string.indexOf('\n') != -1) || (string.indexOf('\r') != -1) || + (string.indexOf('\'') != -1) || (string.indexOf('"') != -1) || + (string.indexOf('\\') != -1) || + (string.indexOf('\t') != -1) || (string.indexOf('%') != -1) || + (string.indexOf('\u001E') != -1)) { + string = backQuoteChars(string); + quote = true; + } + + // Enclose the string in 's if the string contains a recently added + // backquote or contains one of the following characters. + if ((quote == true) || + (string.indexOf('{') != -1) || (string.indexOf('}') != -1) || + (string.indexOf(',') != -1) || (string.equals("?")) || + (string.indexOf(' ') != -1) || (string.equals(""))) { + string = ("'".concat(string)).concat("'"); + } + + return string; + } + + /** + * unquotes are previously quoted string (but only if necessary), i.e., it removes the single quotes around it. + * Inverse to quote(String). + * + * @param string + * the string to process + * @return the unquoted string + * @see #quote(String) + */ + public static String unquote(String string) { + if (string.startsWith("'") && string.endsWith("'")) { + string = string.substring(1, string.length() - 1); + + if ((string.indexOf("\\n") != -1) || (string.indexOf("\\r") != -1) || + (string.indexOf("\\'") != -1) || (string.indexOf("\\\"") != -1) || + (string.indexOf("\\\\") != -1) || + (string.indexOf("\\t") != -1) || (string.indexOf("\\%") != -1) || + (string.indexOf("\\u001E") != -1)) { + string = unbackQuoteChars(string); + } + } + + return string; + } + + /** + * Converts carriage returns and new lines in a string into \r and \n. Backquotes the following characters: ` " \ \t + * and % + * + * @param string + * the string + * @return the converted string + * @see #unbackQuoteChars(String) + */ + public static/* @pure@ */String backQuoteChars(String string) { + + int index; + StringBuffer newStringBuffer; + + // replace each of the following characters with the backquoted version + char charsFind[] = { '\\', '\'', '\t', '\n', '\r', '"', '%', + '\u001E' }; + String charsReplace[] = { "\\\\", "\\'", "\\t", "\\n", "\\r", "\\\"", "\\%", + "\\u001E" }; + for (int i = 0; i < charsFind.length; i++) { + if (string.indexOf(charsFind[i]) != -1) { + newStringBuffer = new StringBuffer(); + while ((index = string.indexOf(charsFind[i])) != -1) { + if (index > 0) { + newStringBuffer.append(string.substring(0, index)); + } + newStringBuffer.append(charsReplace[i]); + if ((index + 1) < string.length()) { + string = string.substring(index + 1); + } else { + string = ""; + } + } + newStringBuffer.append(string); + string = newStringBuffer.toString(); + } + } + + return string; + } + + /** + * Converts carriage returns and new lines in a string into \r and \n. + * + * @param string + * the string + * @return the converted string + */ + public static String convertNewLines(String string) { + int index; + + // Replace with \n + StringBuffer newStringBuffer = new StringBuffer(); + while ((index = string.indexOf('\n')) != -1) { + if (index > 0) { + newStringBuffer.append(string.substring(0, index)); + } + newStringBuffer.append('\\'); + newStringBuffer.append('n'); + if ((index + 1) < string.length()) { + string = string.substring(index + 1); + } else { + string = ""; + } + } + newStringBuffer.append(string); + string = newStringBuffer.toString(); + + // Replace with \r + newStringBuffer = new StringBuffer(); + while ((index = string.indexOf('\r')) != -1) { + if (index > 0) { + newStringBuffer.append(string.substring(0, index)); + } + newStringBuffer.append('\\'); + newStringBuffer.append('r'); + if ((index + 1) < string.length()) { + string = string.substring(index + 1); + } else { + string = ""; + } + } + newStringBuffer.append(string); + return newStringBuffer.toString(); + } + + /** + * Reverts \r and \n in a string into carriage returns and new lines. + * + * @param string + * the string + * @return the converted string + */ + public static String revertNewLines(String string) { + int index; + + // Replace with \n + StringBuffer newStringBuffer = new StringBuffer(); + while ((index = string.indexOf("\\n")) != -1) { + if (index > 0) { + newStringBuffer.append(string.substring(0, index)); + } + newStringBuffer.append('\n'); + if ((index + 2) < string.length()) { + string = string.substring(index + 2); + } else { + string = ""; + } + } + newStringBuffer.append(string); + string = newStringBuffer.toString(); + + // Replace with \r + newStringBuffer = new StringBuffer(); + while ((index = string.indexOf("\\r")) != -1) { + if (index > 0) { + newStringBuffer.append(string.substring(0, index)); + } + newStringBuffer.append('\r'); + if ((index + 2) < string.length()) { + string = string.substring(index + 2); + } else { + string = ""; + } + } + newStringBuffer.append(string); + + return newStringBuffer.toString(); + } + + /** + * Returns the secondary set of options (if any) contained in the supplied options array. The secondary set is defined + * to be any options after the first "--". These options are removed from the original options array. + * + * @param options + * the input array of options + * @return the array of secondary options + */ + public static String[] partitionOptions(String[] options) { + + for (int i = 0; i < options.length; i++) { + if (options[i].equals("--")) { + options[i++] = ""; + String[] result = new String[options.length - i]; + for (int j = i; j < options.length; j++) { + result[j - i] = options[j]; + options[j] = ""; + } + return result; + } + } + return new String[0]; + } + + /** + * The inverse operation of backQuoteChars(). Converts back-quoted carriage returns and new lines in a string to the + * corresponding character ('\r' and '\n'). Also "un"-back-quotes the following characters: ` " \ \t and % + * + * @param string + * the string + * @return the converted string + * @see #backQuoteChars(String) + */ + public static String unbackQuoteChars(String string) { + + int index; + StringBuffer newStringBuffer; + + // replace each of the following characters with the backquoted version + String charsFind[] = { "\\\\", "\\'", "\\t", "\\n", "\\r", "\\\"", "\\%", + "\\u001E" }; + char charsReplace[] = { '\\', '\'', '\t', '\n', '\r', '"', '%', + '\u001E' }; + int pos[] = new int[charsFind.length]; + int curPos; + + String str = new String(string); + newStringBuffer = new StringBuffer(); + while (str.length() > 0) { + // get positions and closest character to replace + curPos = str.length(); + index = -1; + for (int i = 0; i < pos.length; i++) { + pos[i] = str.indexOf(charsFind[i]); + if ((pos[i] > -1) && (pos[i] < curPos)) { + index = i; + curPos = pos[i]; + } + } + + // replace character if found, otherwise finished + if (index == -1) { + newStringBuffer.append(str); + str = ""; + } + else { + newStringBuffer.append(str.substring(0, pos[index])); + newStringBuffer.append(charsReplace[index]); + str = str.substring(pos[index] + charsFind[index].length()); + } + } + + return newStringBuffer.toString(); + } + + /** + * Split up a string containing options into an array of strings, one for each option. + * + * @param quotedOptionString + * the string containing the options + * @return the array of options + * @throws Exception + * in case of an unterminated string, unknown character or a parse error + */ + public static String[] splitOptions(String quotedOptionString) throws Exception { + + Vector<String> optionsVec = new Vector<String>(); + String str = new String(quotedOptionString); + int i; + + while (true) { + + // trimLeft + i = 0; + while ((i < str.length()) && (Character.isWhitespace(str.charAt(i)))) + i++; + str = str.substring(i); + + // stop when str is empty + if (str.length() == 0) + break; + + // if str start with a double quote + if (str.charAt(0) == '"') { + + // find the first not anti-slached double quote + i = 1; + while (i < str.length()) { + if (str.charAt(i) == str.charAt(0)) + break; + if (str.charAt(i) == '\\') { + i += 1; + if (i >= str.length()) + throw new Exception("String should not finish with \\"); + } + i += 1; + } + if (i >= str.length()) + throw new Exception("Quote parse error."); + + // add the founded string to the option vector (without quotes) + String optStr = str.substring(1, i); + optStr = unbackQuoteChars(optStr); + optionsVec.addElement(optStr); + str = str.substring(i + 1); + } else { + // find first whiteSpace + i = 0; + while ((i < str.length()) && (!Character.isWhitespace(str.charAt(i)))) + i++; + + // add the founded string to the option vector + String optStr = str.substring(0, i); + optionsVec.addElement(optStr); + str = str.substring(i); + } + } + + // convert optionsVec to an array of String + String[] options = new String[optionsVec.size()]; + for (i = 0; i < optionsVec.size(); i++) { + options[i] = (String) optionsVec.elementAt(i); + } + return options; + } + + /** + * Joins all the options in an option array into a single string, as might be used on the command line. + * + * @param optionArray + * the array of options + * @return the string containing all options. + */ + public static String joinOptions(String[] optionArray) { + + String optionString = ""; + for (int i = 0; i < optionArray.length; i++) { + if (optionArray[i].equals("")) { + continue; + } + boolean escape = false; + for (int n = 0; n < optionArray[i].length(); n++) { + if (Character.isWhitespace(optionArray[i].charAt(n))) { + escape = true; + break; + } + } + if (escape) { + optionString += '"' + backQuoteChars(optionArray[i]) + '"'; + } else { + optionString += optionArray[i]; + } + optionString += " "; + } + return optionString.trim(); + } + + /** + * Computes entropy for an array of integers. + * + * @param counts + * array of counts + * @return - a log2 a - b log2 b - c log2 c + (a+b+c) log2 (a+b+c) when given array [a b c] + */ + public static/* @pure@ */double info(int counts[]) { + + int total = 0; + double x = 0; + for (int j = 0; j < counts.length; j++) { + x -= xlogx(counts[j]); + total += counts[j]; + } + return x + xlogx(total); + } + + /** + * Tests if a is smaller or equal to b. + * + * @param a + * a double + * @param b + * a double + */ + public static/* @pure@ */boolean smOrEq(double a, double b) { + + return (a - b < SMALL); + } + + /** + * Tests if a is greater or equal to b. + * + * @param a + * a double + * @param b + * a double + */ + public static/* @pure@ */boolean grOrEq(double a, double b) { + + return (b - a < SMALL); + } + + /** + * Tests if a is smaller than b. + * + * @param a + * a double + * @param b + * a double + */ + public static/* @pure@ */boolean sm(double a, double b) { + + return (b - a > SMALL); + } + + /** + * Tests if a is greater than b. + * + * @param a + * a double + * @param b + * a double + */ + public static/* @pure@ */boolean gr(double a, double b) { + + return (a - b > SMALL); + } + + /** + * Returns the kth-smallest value in the array. + * + * @param array + * the array of integers + * @param k + * the value of k + * @return the kth-smallest value + */ + public static double kthSmallestValue(int[] array, int k) { + + int[] index = new int[array.length]; + + for (int i = 0; i < index.length; i++) { + index[i] = i; + } + + return array[index[select(array, index, 0, array.length - 1, k)]]; + } + + /** + * Returns the kth-smallest value in the array + * + * @param array + * the array of double + * @param k + * the value of k + * @return the kth-smallest value + */ + public static double kthSmallestValue(double[] array, int k) { + + int[] index = new int[array.length]; + + for (int i = 0; i < index.length; i++) { + index[i] = i; + } + + return array[index[select(array, index, 0, array.length - 1, k)]]; + } + + /** + * Returns the logarithm of a for base 2. + * + * @param a + * a double + * @return the logarithm for base 2 + */ + public static/* @pure@ */double log2(double a) { + + return Math.log(a) / log2; + } + + /** + * Returns index of maximum element in a given array of doubles. First maximum is returned. + * + * @param doubles + * the array of doubles + * @return the index of the maximum element + */ + public static/* @pure@ */int maxIndex(double[] doubles) { + + double maximum = 0; + int maxIndex = 0; + + for (int i = 0; i < doubles.length; i++) { + if ((i == 0) || (doubles[i] > maximum)) { + maxIndex = i; + maximum = doubles[i]; + } + } + + return maxIndex; + } + + /** + * Returns index of maximum element in a given array of integers. First maximum is returned. + * + * @param ints + * the array of integers + * @return the index of the maximum element + */ + public static/* @pure@ */int maxIndex(int[] ints) { + + int maximum = 0; + int maxIndex = 0; + + for (int i = 0; i < ints.length; i++) { + if ((i == 0) || (ints[i] > maximum)) { + maxIndex = i; + maximum = ints[i]; + } + } + + return maxIndex; + } + + /** + * Computes the mean for an array of doubles. + * + * @param vector + * the array + * @return the mean + */ + public static/* @pure@ */double mean(double[] vector) { + + double sum = 0; + + if (vector.length == 0) { + return 0; + } + for (int i = 0; i < vector.length; i++) { + sum += vector[i]; + } + return sum / (double) vector.length; + } + + /** + * Returns index of minimum element in a given array of integers. First minimum is returned. + * + * @param ints + * the array of integers + * @return the index of the minimum element + */ + public static/* @pure@ */int minIndex(int[] ints) { + + int minimum = 0; + int minIndex = 0; + + for (int i = 0; i < ints.length; i++) { + if ((i == 0) || (ints[i] < minimum)) { + minIndex = i; + minimum = ints[i]; + } + } + + return minIndex; + } + + /** + * Returns index of minimum element in a given array of doubles. First minimum is returned. + * + * @param doubles + * the array of doubles + * @return the index of the minimum element + */ + public static/* @pure@ */int minIndex(double[] doubles) { + + double minimum = 0; + int minIndex = 0; + + for (int i = 0; i < doubles.length; i++) { + if ((i == 0) || (doubles[i] < minimum)) { + minIndex = i; + minimum = doubles[i]; + } + } + + return minIndex; + } + + /** + * Normalizes the doubles in the array by their sum. + * + * @param doubles + * the array of double + * @exception IllegalArgumentException + * if sum is Zero or NaN + */ + public static void normalize(double[] doubles) { + + double sum = 0; + for (int i = 0; i < doubles.length; i++) { + sum += doubles[i]; + } + normalize(doubles, sum); + } + + /** + * Normalizes the doubles in the array using the given value. + * + * @param doubles + * the array of double + * @param sum + * the value by which the doubles are to be normalized + * @exception IllegalArgumentException + * if sum is zero or NaN + */ + public static void normalize(double[] doubles, double sum) { + + if (Double.isNaN(sum)) { + throw new IllegalArgumentException("Can't normalize array. Sum is NaN."); + } + if (sum == 0) { + // Maybe this should just be a return. + throw new IllegalArgumentException("Can't normalize array. Sum is zero."); + } + for (int i = 0; i < doubles.length; i++) { + doubles[i] /= sum; + } + } + + /** + * Converts an array containing the natural logarithms of probabilities stored in a vector back into probabilities. + * The probabilities are assumed to sum to one. + * + * @param a + * an array holding the natural logarithms of the probabilities + * @return the converted array + */ + public static double[] logs2probs(double[] a) { + + double max = a[maxIndex(a)]; + double sum = 0.0; + + double[] result = new double[a.length]; + for (int i = 0; i < a.length; i++) { + result[i] = Math.exp(a[i] - max); + sum += result[i]; + } + + normalize(result, sum); + + return result; + } + + /** + * Returns the log-odds for a given probabilitiy. + * + * @param prob + * the probabilitiy + * + * @return the log-odds after the probability has been mapped to [Utils.SMALL, 1-Utils.SMALL] + */ + public static/* @pure@ */double probToLogOdds(double prob) { + + if (gr(prob, 1) || (sm(prob, 0))) { + throw new IllegalArgumentException("probToLogOdds: probability must " + + "be in [0,1] " + prob); + } + double p = SMALL + (1.0 - 2 * SMALL) * prob; + return Math.log(p / (1 - p)); + } + + /** + * Rounds a double to the next nearest integer value. The JDK version of it doesn't work properly. + * + * @param value + * the double value + * @return the resulting integer value + */ + public static/* @pure@ */int round(double value) { + + int roundedValue = value > 0 + ? (int) (value + 0.5) + : -(int) (Math.abs(value) + 0.5); + + return roundedValue; + } + + /** + * Rounds a double to the next nearest integer value in a probabilistic fashion (e.g. 0.8 has a 20% chance of being + * rounded down to 0 and a 80% chance of being rounded up to 1). In the limit, the average of the rounded numbers + * generated by this procedure should converge to the original double. + * + * @param value + * the double value + * @param rand + * the random number generator + * @return the resulting integer value + */ + public static int probRound(double value, Random rand) { + + if (value >= 0) { + double lower = Math.floor(value); + double prob = value - lower; + if (rand.nextDouble() < prob) { + return (int) lower + 1; + } else { + return (int) lower; + } + } else { + double lower = Math.floor(Math.abs(value)); + double prob = Math.abs(value) - lower; + if (rand.nextDouble() < prob) { + return -((int) lower + 1); + } else { + return -(int) lower; + } + } + } + + /** + * Rounds a double to the given number of decimal places. + * + * @param value + * the double value + * @param afterDecimalPoint + * the number of digits after the decimal point + * @return the double rounded to the given precision + */ + public static/* @pure@ */double roundDouble(double value, int afterDecimalPoint) { + + double mask = Math.pow(10.0, (double) afterDecimalPoint); + + return (double) (Math.round(value * mask)) / mask; + } + + /** + * Sorts a given array of integers in ascending order and returns an array of integers with the positions of the + * elements of the original array in the sorted array. The sort is stable. (Equal elements remain in their original + * order.) + * + * @param array + * this array is not changed by the method! + * @return an array of integers with the positions in the sorted array. + */ + public static/* @pure@ */int[] sort(int[] array) { + + int[] index = new int[array.length]; + int[] newIndex = new int[array.length]; + int[] helpIndex; + int numEqual; + + for (int i = 0; i < index.length; i++) { + index[i] = i; + } + quickSort(array, index, 0, array.length - 1); + + // Make sort stable + int i = 0; + while (i < index.length) { + numEqual = 1; + for (int j = i + 1; ((j < index.length) + && (array[index[i]] == array[index[j]])); j++) { + numEqual++; + } + if (numEqual > 1) { + helpIndex = new int[numEqual]; + for (int j = 0; j < numEqual; j++) { + helpIndex[j] = i + j; + } + quickSort(index, helpIndex, 0, numEqual - 1); + for (int j = 0; j < numEqual; j++) { + newIndex[i + j] = index[helpIndex[j]]; + } + i += numEqual; + } else { + newIndex[i] = index[i]; + i++; + } + } + return newIndex; + } + + /** + * Sorts a given array of doubles in ascending order and returns an array of integers with the positions of the + * elements of the original array in the sorted array. NOTE THESE CHANGES: the sort is no longer stable and it doesn't + * use safe floating-point comparisons anymore. Occurrences of Double.NaN are treated as Double.MAX_VALUE + * + * @param array + * this array is not changed by the method! + * @return an array of integers with the positions in the sorted array. + */ + public static/* @pure@ */int[] sort(/* @non_null@ */double[] array) { + + int[] index = new int[array.length]; + array = (double[]) array.clone(); + for (int i = 0; i < index.length; i++) { + index[i] = i; + if (Double.isNaN(array[i])) { + array[i] = Double.MAX_VALUE; + } + } + quickSort(array, index, 0, array.length - 1); + return index; + } + + /** + * Sorts a given array of doubles in ascending order and returns an array of integers with the positions of the + * elements of the original array in the sorted array. The sort is stable (Equal elements remain in their original + * order.) Occurrences of Double.NaN are treated as Double.MAX_VALUE + * + * @param array + * this array is not changed by the method! + * @return an array of integers with the positions in the sorted array. + */ + public static/* @pure@ */int[] stableSort(double[] array) { + + int[] index = new int[array.length]; + int[] newIndex = new int[array.length]; + int[] helpIndex; + int numEqual; + + array = (double[]) array.clone(); + for (int i = 0; i < index.length; i++) { + index[i] = i; + if (Double.isNaN(array[i])) { + array[i] = Double.MAX_VALUE; + } + } + quickSort(array, index, 0, array.length - 1); + + // Make sort stable + + int i = 0; + while (i < index.length) { + numEqual = 1; + for (int j = i + 1; ((j < index.length) && Utils.eq(array[index[i]], + array[index[j]])); j++) + numEqual++; + if (numEqual > 1) { + helpIndex = new int[numEqual]; + for (int j = 0; j < numEqual; j++) + helpIndex[j] = i + j; + quickSort(index, helpIndex, 0, numEqual - 1); + for (int j = 0; j < numEqual; j++) + newIndex[i + j] = index[helpIndex[j]]; + i += numEqual; + } else { + newIndex[i] = index[i]; + i++; + } + } + + return newIndex; + } + + /** + * Computes the variance for an array of doubles. + * + * @param vector + * the array + * @return the variance + */ + public static/* @pure@ */double variance(double[] vector) { + + double sum = 0, sumSquared = 0; + + if (vector.length <= 1) { + return 0; + } + for (int i = 0; i < vector.length; i++) { + sum += vector[i]; + sumSquared += (vector[i] * vector[i]); + } + double result = (sumSquared - (sum * sum / (double) vector.length)) / + (double) (vector.length - 1); + + // We don't like negative variance + if (result < 0) { + return 0; + } else { + return result; + } + } + + /** + * Computes the sum of the elements of an array of doubles. + * + * @param doubles + * the array of double + * @return the sum of the elements + */ + public static/* @pure@ */double sum(double[] doubles) { + + double sum = 0; + + for (int i = 0; i < doubles.length; i++) { + sum += doubles[i]; + } + return sum; + } + + /** + * Computes the sum of the elements of an array of integers. + * + * @param ints + * the array of integers + * @return the sum of the elements + */ + public static/* @pure@ */int sum(int[] ints) { + + int sum = 0; + + for (int i = 0; i < ints.length; i++) { + sum += ints[i]; + } + return sum; + } + + /** + * Returns c*log2(c) for a given integer value c. + * + * @param c + * an integer value + * @return c*log2(c) (but is careful to return 0 if c is 0) + */ + public static/* @pure@ */double xlogx(int c) { + + if (c == 0) { + return 0.0; + } + return c * Utils.log2((double) c); + } + + /** + * Partitions the instances around a pivot. Used by quicksort and kthSmallestValue. + * + * @param array + * the array of doubles to be sorted + * @param index + * the index into the array of doubles + * @param l + * the first index of the subset + * @param r + * the last index of the subset + * + * @return the index of the middle element + */ + private static int partition(double[] array, int[] index, int l, int r) { + + double pivot = array[index[(l + r) / 2]]; + int help; + + while (l < r) { + while ((array[index[l]] < pivot) && (l < r)) { + l++; + } + while ((array[index[r]] > pivot) && (l < r)) { + r--; + } + if (l < r) { + help = index[l]; + index[l] = index[r]; + index[r] = help; + l++; + r--; + } + } + if ((l == r) && (array[index[r]] > pivot)) { + r--; + } + + return r; + } + + /** + * Partitions the instances around a pivot. Used by quicksort and kthSmallestValue. + * + * @param array + * the array of integers to be sorted + * @param index + * the index into the array of integers + * @param l + * the first index of the subset + * @param r + * the last index of the subset + * + * @return the index of the middle element + */ + private static int partition(int[] array, int[] index, int l, int r) { + + double pivot = array[index[(l + r) / 2]]; + int help; + + while (l < r) { + while ((array[index[l]] < pivot) && (l < r)) { + l++; + } + while ((array[index[r]] > pivot) && (l < r)) { + r--; + } + if (l < r) { + help = index[l]; + index[l] = index[r]; + index[r] = help; + l++; + r--; + } + } + if ((l == r) && (array[index[r]] > pivot)) { + r--; + } + + return r; + } + + /** + * Implements quicksort according to Manber's "Introduction to Algorithms". + * + * @param array + * the array of doubles to be sorted + * @param index + * the index into the array of doubles + * @param left + * the first index of the subset to be sorted + * @param right + * the last index of the subset to be sorted + */ + // @ requires 0 <= first && first <= right && right < array.length; + // @ requires (\forall int i; 0 <= i && i < index.length; 0 <= index[i] && + // index[i] < array.length); + // @ requires array != index; + // assignable index; + private static void quickSort(/* @non_null@ */double[] array, /* @non_null@ */int[] index, + int left, int right) { + + if (left < right) { + int middle = partition(array, index, left, right); + quickSort(array, index, left, middle); + quickSort(array, index, middle + 1, right); + } + } + + /** + * Implements quicksort according to Manber's "Introduction to Algorithms". + * + * @param array + * the array of integers to be sorted + * @param index + * the index into the array of integers + * @param left + * the first index of the subset to be sorted + * @param right + * the last index of the subset to be sorted + */ + // @ requires 0 <= first && first <= right && right < array.length; + // @ requires (\forall int i; 0 <= i && i < index.length; 0 <= index[i] && + // index[i] < array.length); + // @ requires array != index; + // assignable index; + private static void quickSort(/* @non_null@ */int[] array, /* @non_null@ */int[] index, + int left, int right) { + + if (left < right) { + int middle = partition(array, index, left, right); + quickSort(array, index, left, middle); + quickSort(array, index, middle + 1, right); + } + } + + /** + * Implements computation of the kth-smallest element according to Manber's "Introduction to Algorithms". + * + * @param array + * the array of double + * @param index + * the index into the array of doubles + * @param left + * the first index of the subset + * @param right + * the last index of the subset + * @param k + * the value of k + * + * @return the index of the kth-smallest element + */ + // @ requires 0 <= first && first <= right && right < array.length; + private static int select(/* @non_null@ */double[] array, /* @non_null@ */int[] index, + int left, int right, int k) { + + if (left == right) { + return left; + } else { + int middle = partition(array, index, left, right); + if ((middle - left + 1) >= k) { + return select(array, index, left, middle, k); + } else { + return select(array, index, middle + 1, right, k - (middle - left + 1)); + } + } + } + + /** + * Converts a File's absolute path to a path relative to the user (ie start) directory. Includes an additional + * workaround for Cygwin, which doesn't like upper case drive letters. + * + * @param absolute + * the File to convert to relative path + * @return a File with a path that is relative to the user's directory + * @exception Exception + * if the path cannot be constructed + */ + public static File convertToRelativePath(File absolute) throws Exception { + File result; + String fileStr; + + result = null; + + // if we're running windows, it could be Cygwin + if (File.separator.equals("\\")) { + // Cygwin doesn't like upper case drives -> try lower case drive + try { + fileStr = absolute.getPath(); + fileStr = fileStr.substring(0, 1).toLowerCase() + + fileStr.substring(1); + result = createRelativePath(new File(fileStr)); + } catch (Exception e) { + // no luck with Cygwin workaround, convert it like it is + result = createRelativePath(absolute); + } + } + else { + result = createRelativePath(absolute); + } + + return result; + } + + /** + * Converts a File's absolute path to a path relative to the user (ie start) directory. + * + * @param absolute + * the File to convert to relative path + * @return a File with a path that is relative to the user's directory + * @exception Exception + * if the path cannot be constructed + */ + protected static File createRelativePath(File absolute) throws Exception { + File userDir = new File(System.getProperty("user.dir")); + String userPath = userDir.getAbsolutePath() + File.separator; + String targetPath = (new File(absolute.getParent())).getPath() + + File.separator; + String fileName = absolute.getName(); + StringBuffer relativePath = new StringBuffer(); + // relativePath.append("."+File.separator); + // System.err.println("User dir "+userPath); + // System.err.println("Target path "+targetPath); + + // file is in user dir (or subdir) + int subdir = targetPath.indexOf(userPath); + if (subdir == 0) { + if (userPath.length() == targetPath.length()) { + relativePath.append(fileName); + } else { + int ll = userPath.length(); + relativePath.append(targetPath.substring(ll)); + relativePath.append(fileName); + } + } else { + int sepCount = 0; + String temp = new String(userPath); + while (temp.indexOf(File.separator) != -1) { + int ind = temp.indexOf(File.separator); + sepCount++; + temp = temp.substring(ind + 1, temp.length()); + } + + String targetTemp = new String(targetPath); + String userTemp = new String(userPath); + int tcount = 0; + while (targetTemp.indexOf(File.separator) != -1) { + int ind = targetTemp.indexOf(File.separator); + int ind2 = userTemp.indexOf(File.separator); + String tpart = targetTemp.substring(0, ind + 1); + String upart = userTemp.substring(0, ind2 + 1); + if (tpart.compareTo(upart) != 0) { + if (tcount == 0) { + tcount = -1; + } + break; + } + tcount++; + targetTemp = targetTemp.substring(ind + 1, targetTemp.length()); + userTemp = userTemp.substring(ind2 + 1, userTemp.length()); + } + if (tcount == -1) { + // then target file is probably on another drive (under windows) + throw new Exception("Can't construct a path to file relative to user " + + "dir."); + } + if (targetTemp.indexOf(File.separator) == -1) { + targetTemp = ""; + } + for (int i = 0; i < sepCount - tcount; i++) { + relativePath.append(".." + File.separator); + } + relativePath.append(targetTemp + fileName); + } + // System.err.println("new path : "+relativePath.toString()); + return new File(relativePath.toString()); + } + + /** + * Implements computation of the kth-smallest element according to Manber's "Introduction to Algorithms". + * + * @param array + * the array of integers + * @param index + * the index into the array of integers + * @param left + * the first index of the subset + * @param right + * the last index of the subset + * @param k + * the value of k + * + * @return the index of the kth-smallest element + */ + // @ requires 0 <= first && first <= right && right < array.length; + private static int select(/* @non_null@ */int[] array, /* @non_null@ */int[] index, + int left, int right, int k) { + + if (left == right) { + return left; + } else { + int middle = partition(array, index, left, right); + if ((middle - left + 1) >= k) { + return select(array, index, left, middle, k); + } else { + return select(array, index, middle + 1, right, k - (middle - left + 1)); + } + } + } + + /** + * Breaks up the string, if wider than "columns" characters. + * + * @param s + * the string to process + * @param columns + * the width in columns + * @return the processed string + */ + public static String[] breakUp(String s, int columns) { + Vector<String> result; + String line; + BreakIterator boundary; + int boundaryStart; + int boundaryEnd; + String word; + String punctuation; + int i; + String[] lines; + + result = new Vector<String>(); + punctuation = " .,;:!?'\""; + lines = s.split("\n"); + + for (i = 0; i < lines.length; i++) { + boundary = BreakIterator.getWordInstance(); + boundary.setText(lines[i]); + boundaryStart = boundary.first(); + boundaryEnd = boundary.next(); + line = ""; + + while (boundaryEnd != BreakIterator.DONE) { + word = lines[i].substring(boundaryStart, boundaryEnd); + if (line.length() >= columns) { + if (word.length() == 1) { + if (punctuation.indexOf(word.charAt(0)) > -1) { + line += word; + word = ""; + } + } + result.add(line); + line = ""; + } + line += word; + boundaryStart = boundaryEnd; + boundaryEnd = boundary.next(); + } + if (line.length() > 0) + result.add(line); + } + + return result.toArray(new String[result.size()]); + } + + /** + * Creates a new instance of an object given it's class name and (optional) arguments to pass to it's setOptions + * method. If the object implements OptionHandler and the options parameter is non-null, the object will have it's + * options set. Example use: + * <p> + * + * <code> <pre> + * String classifierName = Utils.getOption('W', options); + * Classifier c = (Classifier)Utils.forName(Classifier.class, + * classifierName, + * options); + * setClassifier(c); + * </pre></code> + * + * @param classType + * the class that the instantiated object should be assignable to -- an exception is thrown if this is not + * the case + * @param className + * the fully qualified class name of the object + * @param options + * an array of options suitable for passing to setOptions. May be null. Any options accepted by the object + * will be removed from the array. + * @return the newly created object, ready for use. + * @exception Exception + * if the class name is invalid, or if the class is not assignable to the desired class type, or the + * options supplied are not acceptable to the object + */ + public static Object forName(Class<?> classType, + String className, + String[] options) throws Exception { + + Class<?> c = null; + try { + c = Class.forName(className); + } catch (Exception ex) { + throw new Exception("Can't find class called: " + className); + } + if (!classType.isAssignableFrom(c)) { + throw new Exception(classType.getName() + " is not assignable from " + + className); + } + Object o = c.newInstance(); + /* + * if ((o instanceof OptionHandler) && (options != null)) { + * ((OptionHandler)o).setOptions(options); + * Utils.checkForRemainingOptions(options); } + */ + return o; + } + +}
http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/9b178f63/samoa-api/src/main/java/org/apache/samoa/moa/evaluation/LearningCurve.java ---------------------------------------------------------------------- diff --git a/samoa-api/src/main/java/org/apache/samoa/moa/evaluation/LearningCurve.java b/samoa-api/src/main/java/org/apache/samoa/moa/evaluation/LearningCurve.java new file mode 100644 index 0000000..427e01d --- /dev/null +++ b/samoa-api/src/main/java/org/apache/samoa/moa/evaluation/LearningCurve.java @@ -0,0 +1,132 @@ +package org.apache.samoa.moa.evaluation; + +/* + * #%L + * SAMOA + * %% + * Copyright (C) 2014 - 2015 Apache Software Foundation + * %% + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * #L% + */ + +import java.util.ArrayList; +import java.util.List; + +import org.apache.samoa.moa.AbstractMOAObject; +import org.apache.samoa.moa.core.DoubleVector; +import org.apache.samoa.moa.core.Measurement; +import org.apache.samoa.moa.core.StringUtils; + +/** + * Class that stores and keeps the history of evaluation measurements. + * + * @author Richard Kirkby ([email protected]) + * @version $Revision: 7 $ + */ +public class LearningCurve extends AbstractMOAObject { + + private static final long serialVersionUID = 1L; + + protected List<String> measurementNames = new ArrayList<String>(); + + protected List<double[]> measurementValues = new ArrayList<double[]>(); + + public LearningCurve(String orderingMeasurementName) { + this.measurementNames.add(orderingMeasurementName); + } + + public String getOrderingMeasurementName() { + return this.measurementNames.get(0); + } + + public void insertEntry(LearningEvaluation learningEvaluation) { + Measurement[] measurements = learningEvaluation.getMeasurements(); + Measurement orderMeasurement = Measurement.getMeasurementNamed( + getOrderingMeasurementName(), measurements); + if (orderMeasurement == null) { + throw new IllegalArgumentException(); + } + DoubleVector entryVals = new DoubleVector(); + for (Measurement measurement : measurements) { + entryVals.setValue(addMeasurementName(measurement.getName()), + measurement.getValue()); + } + double orderVal = orderMeasurement.getValue(); + int index = 0; + while ((index < this.measurementValues.size()) + && (orderVal > this.measurementValues.get(index)[0])) { + index++; + } + this.measurementValues.add(index, entryVals.getArrayRef()); + } + + public int numEntries() { + return this.measurementValues.size(); + } + + protected int addMeasurementName(String name) { + int index = this.measurementNames.indexOf(name); + if (index < 0) { + index = this.measurementNames.size(); + this.measurementNames.add(name); + } + return index; + } + + public String headerToString() { + StringBuilder sb = new StringBuilder(); + boolean first = true; + for (String name : this.measurementNames) { + if (!first) { + sb.append(','); + } else { + first = false; + } + sb.append(name); + } + return sb.toString(); + } + + public String entryToString(int entryIndex) { + StringBuilder sb = new StringBuilder(); + double[] vals = this.measurementValues.get(entryIndex); + for (int i = 0; i < this.measurementNames.size(); i++) { + if (i > 0) { + sb.append(','); + } + if ((i >= vals.length) || Double.isNaN(vals[i])) { + sb.append('?'); + } else { + sb.append(Double.toString(vals[i])); + } + } + return sb.toString(); + } + + @Override + public void getDescription(StringBuilder sb, int indent) { + sb.append(headerToString()); + for (int i = 0; i < numEntries(); i++) { + StringUtils.appendNewlineIndented(sb, indent, entryToString(i)); + } + } + + public double getMeasurement(int entryIndex, int measurementIndex) { + return this.measurementValues.get(entryIndex)[measurementIndex]; + } + + public String getMeasurementName(int measurementIndex) { + return this.measurementNames.get(measurementIndex); + } +} http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/9b178f63/samoa-api/src/main/java/org/apache/samoa/moa/evaluation/LearningEvaluation.java ---------------------------------------------------------------------- diff --git a/samoa-api/src/main/java/org/apache/samoa/moa/evaluation/LearningEvaluation.java b/samoa-api/src/main/java/org/apache/samoa/moa/evaluation/LearningEvaluation.java new file mode 100644 index 0000000..993667d --- /dev/null +++ b/samoa-api/src/main/java/org/apache/samoa/moa/evaluation/LearningEvaluation.java @@ -0,0 +1,64 @@ +package org.apache.samoa.moa.evaluation; + +/* + * #%L + * SAMOA + * %% + * Copyright (C) 2014 - 2015 Apache Software Foundation + * %% + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * #L% + */ + +import java.util.Arrays; +import java.util.LinkedList; +import java.util.List; + +import org.apache.samoa.moa.AbstractMOAObject; +import org.apache.samoa.moa.core.Measurement; +import org.apache.samoa.moa.learners.Learner; + +/** + * Class that stores an array of evaluation measurements. + * + * @author Richard Kirkby ([email protected]) + * @version $Revision: 7 $ + */ +public class LearningEvaluation extends AbstractMOAObject { + + private static final long serialVersionUID = 1L; + + protected Measurement[] measurements; + + public LearningEvaluation(Measurement[] measurements) { + this.measurements = measurements.clone(); + } + + public LearningEvaluation(Measurement[] evaluationMeasurements, + LearningPerformanceEvaluator cpe, Learner model) { + List<Measurement> measurementList = new LinkedList<Measurement>(); + measurementList.addAll(Arrays.asList(evaluationMeasurements)); + measurementList.addAll(Arrays.asList(cpe.getPerformanceMeasurements())); + measurementList.addAll(Arrays.asList(model.getModelMeasurements())); + this.measurements = measurementList.toArray(new Measurement[measurementList.size()]); + } + + public Measurement[] getMeasurements() { + return this.measurements.clone(); + } + + @Override + public void getDescription(StringBuilder sb, int indent) { + Measurement.getMeasurementsDescription(this.measurements, sb, indent); + } +} http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/9b178f63/samoa-api/src/main/java/org/apache/samoa/moa/evaluation/LearningPerformanceEvaluator.java ---------------------------------------------------------------------- diff --git a/samoa-api/src/main/java/org/apache/samoa/moa/evaluation/LearningPerformanceEvaluator.java b/samoa-api/src/main/java/org/apache/samoa/moa/evaluation/LearningPerformanceEvaluator.java new file mode 100644 index 0000000..7a0a8af --- /dev/null +++ b/samoa-api/src/main/java/org/apache/samoa/moa/evaluation/LearningPerformanceEvaluator.java @@ -0,0 +1,59 @@ +package org.apache.samoa.moa.evaluation; + +/* + * #%L + * SAMOA + * %% + * Copyright (C) 2014 - 2015 Apache Software Foundation + * %% + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * #L% + */ + +import org.apache.samoa.moa.MOAObject; +import org.apache.samoa.moa.core.Example; +import org.apache.samoa.moa.core.Measurement; + +/** + * Interface implemented by learner evaluators to monitor the results of the learning process. + * + * @author Richard Kirkby ([email protected]) + * @version $Revision: 7 $ + */ +public interface LearningPerformanceEvaluator<E extends Example> extends MOAObject { + + /** + * Resets this evaluator. It must be similar to starting a new evaluator from scratch. + * + */ + public void reset(); + + /** + * Adds a learning result to this evaluator. + * + * @param example + * the example to be classified + * @param classVotes + * an array containing the estimated membership probabilities of the test instance in each class + * @return an array of measurements monitored in this evaluator + */ + public void addResult(E example, double[] classVotes); + + /** + * Gets the current measurements monitored by this evaluator. + * + * @return an array of measurements monitored by this evaluator + */ + public Measurement[] getPerformanceMeasurements(); + +} http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/9b178f63/samoa-api/src/main/java/org/apache/samoa/moa/evaluation/MeasureCollection.java ---------------------------------------------------------------------- diff --git a/samoa-api/src/main/java/org/apache/samoa/moa/evaluation/MeasureCollection.java b/samoa-api/src/main/java/org/apache/samoa/moa/evaluation/MeasureCollection.java new file mode 100644 index 0000000..42e6d49 --- /dev/null +++ b/samoa-api/src/main/java/org/apache/samoa/moa/evaluation/MeasureCollection.java @@ -0,0 +1,262 @@ +package org.apache.samoa.moa.evaluation; + +/* + * #%L + * SAMOA + * %% + * Copyright (C) 2014 - 2015 Apache Software Foundation + * %% + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * #L% + */ + +import java.util.ArrayList; +import java.util.HashMap; + +import org.apache.samoa.moa.AbstractMOAObject; +import org.apache.samoa.moa.cluster.Clustering; +import org.apache.samoa.moa.core.DataPoint; + +public abstract class MeasureCollection extends AbstractMOAObject { + private String[] names; + private ArrayList<Double>[] values; + private ArrayList<Double>[] sortedValues; + private ArrayList<String> events; + + private double[] minValue; + private double[] maxValue; + private double[] sumValues; + private boolean[] enabled; + private boolean[] corrupted; + private double time; + private boolean debug = true; + private MembershipMatrix mm = null; + + private HashMap<String, Integer> map; + + private int numMeasures = 0; + + public MeasureCollection() { + names = getNames(); + numMeasures = names.length; + map = new HashMap<String, Integer>(numMeasures); + for (int i = 0; i < names.length; i++) { + map.put(names[i], i); + } + values = (ArrayList<Double>[]) new ArrayList[numMeasures]; + sortedValues = (ArrayList<Double>[]) new ArrayList[numMeasures]; + maxValue = new double[numMeasures]; + minValue = new double[numMeasures]; + sumValues = new double[numMeasures]; + corrupted = new boolean[numMeasures]; + enabled = getDefaultEnabled(); + time = 0; + events = new ArrayList<String>(); + + for (int i = 0; i < numMeasures; i++) { + values[i] = new ArrayList<Double>(); + sortedValues[i] = new ArrayList<Double>(); + maxValue[i] = Double.MIN_VALUE; + minValue[i] = Double.MAX_VALUE; + corrupted[i] = false; + sumValues[i] = 0.0; + } + + } + + protected abstract String[] getNames(); + + public void addValue(int index, double value) { + if (Double.isNaN(value)) { + if (debug) + System.out.println("NaN for " + names[index]); + corrupted[index] = true; + } + if (value < 0) { + if (debug) + System.out.println("Negative value for " + names[index]); + } + + values[index].add(value); + sumValues[index] += value; + if (value < minValue[index]) + minValue[index] = value; + if (value > maxValue[index]) + maxValue[index] = value; + } + + protected void addValue(String name, double value) { + if (map.containsKey(name)) { + addValue(map.get(name), value); + } + else { + System.out.println(name + " is not a valid measure key, no value added"); + } + } + + // add an empty entry e.g. if evaluation crashed internally + public void addEmptyValue(int index) { + values[index].add(Double.NaN); + corrupted[index] = true; + } + + public int getNumMeasures() { + return numMeasures; + } + + public String getName(int index) { + return names[index]; + } + + public double getMaxValue(int index) { + return maxValue[index]; + } + + public double getMinValue(int index) { + return minValue[index]; + } + + public double getLastValue(int index) { + if (values[index].size() < 1) + return Double.NaN; + return values[index].get(values[index].size() - 1); + } + + public double getMean(int index) { + if (corrupted[index] || values[index].size() < 1) + return Double.NaN; + + return sumValues[index] / values[index].size(); + } + + private void updateSortedValues(int index) { + // naive implementation of insertion sort + for (int i = sortedValues[index].size(); i < values[index].size(); i++) { + double v = values[index].get(i); + int insertIndex = 0; + while (!sortedValues[index].isEmpty() && insertIndex < sortedValues[index].size() + && v > sortedValues[index].get(insertIndex)) + insertIndex++; + sortedValues[index].add(insertIndex, v); + } + // for (int i = 0; i < sortedValues[index].size(); i++) { + // System.out.print(sortedValues[index].get(i)+" "); + // } + // System.out.println(); + } + + public void clean(int index) { + sortedValues[index].clear(); + } + + public double getMedian(int index) { + updateSortedValues(index); + int size = sortedValues[index].size(); + + if (size > 0) { + if (size % 2 == 1) + return sortedValues[index].get((int) (size / 2)); + else + return (sortedValues[index].get((size - 1) / 2) + sortedValues[index].get((size - 1) / 2 + 1)) / 2.0; + } + return Double.NaN; + } + + public double getLowerQuartile(int index) { + updateSortedValues(index); + int size = sortedValues[index].size(); + if (size > 11) { + return sortedValues[index].get(Math.round(size * 0.25f)); + } + return Double.NaN; + } + + public double getUpperQuartile(int index) { + updateSortedValues(index); + int size = sortedValues[index].size(); + if (size > 11) { + return sortedValues[index].get(Math.round(size * 0.75f - 1)); + } + return Double.NaN; + } + + public int getNumberOfValues(int index) { + return values[index].size(); + } + + public double getValue(int index, int i) { + if (i >= values[index].size()) + return Double.NaN; + return values[index].get(i); + } + + public ArrayList<Double> getAllValues(int index) { + return values[index]; + } + + public void setEnabled(int index, boolean value) { + enabled[index] = value; + } + + public boolean isEnabled(int index) { + return enabled[index]; + } + + public double getMeanRunningTime() { + if (values[0].size() != 0) + return (time / 10e5 / values[0].size()); + else + return 0; + } + + protected boolean[] getDefaultEnabled() { + boolean[] defaults = new boolean[numMeasures]; + for (int i = 0; i < defaults.length; i++) { + defaults[i] = true; + } + return defaults; + } + + protected abstract void evaluateClustering(Clustering clustering, Clustering trueClustering, + ArrayList<DataPoint> points) throws Exception; + + /* + * Evaluate Clustering + * + * return Time in milliseconds + */ + public double evaluateClusteringPerformance(Clustering clustering, Clustering trueClustering, + ArrayList<DataPoint> points) throws Exception { + long start = System.nanoTime(); + evaluateClustering(clustering, trueClustering, points); + long duration = System.nanoTime() - start; + time += duration; + duration /= 10e5; + return duration; + } + + public void getDescription(StringBuilder sb, int indent) { + + } + + public void addEventType(String type) { + events.add(type); + } + + public String getEventType(int index) { + if (index < events.size()) + return events.get(index); + else + return null; + } +} http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/9b178f63/samoa-api/src/main/java/org/apache/samoa/moa/evaluation/MembershipMatrix.java ---------------------------------------------------------------------- diff --git a/samoa-api/src/main/java/org/apache/samoa/moa/evaluation/MembershipMatrix.java b/samoa-api/src/main/java/org/apache/samoa/moa/evaluation/MembershipMatrix.java new file mode 100644 index 0000000..4387c04 --- /dev/null +++ b/samoa-api/src/main/java/org/apache/samoa/moa/evaluation/MembershipMatrix.java @@ -0,0 +1,149 @@ +package org.apache.samoa.moa.evaluation; + +/* + * #%L + * SAMOA + * %% + * Copyright (C) 2014 - 2015 Apache Software Foundation + * %% + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * #L% + */ + +import java.util.ArrayList; +import java.util.HashMap; + +import org.apache.samoa.moa.cluster.Clustering; +import org.apache.samoa.moa.core.DataPoint; + +public class MembershipMatrix { + + HashMap<Integer, Integer> classmap; + int cluster_class_weights[][]; + int cluster_sums[]; + int class_sums[]; + int total_entries; + int class_distribution[]; + int total_class_entries; + int initalBuildTimestamp = -1; + + public MembershipMatrix(Clustering foundClustering, ArrayList<DataPoint> points) { + classmap = Clustering.classValues(points); + // int lastID = classmap.size()-1; + // classmap.put(-1, lastID); + int numClasses = classmap.size(); + int numCluster = foundClustering.size() + 1; + + cluster_class_weights = new int[numCluster][numClasses]; + class_distribution = new int[numClasses]; + cluster_sums = new int[numCluster]; + class_sums = new int[numClasses]; + total_entries = 0; + total_class_entries = points.size(); + for (int p = 0; p < points.size(); p++) { + int worklabel = classmap.get((int) points.get(p).classValue()); + // real class distribution + class_distribution[worklabel]++; + boolean covered = false; + for (int c = 0; c < numCluster - 1; c++) { + double prob = foundClustering.get(c).getInclusionProbability(points.get(p)); + if (prob >= 1) { + cluster_class_weights[c][worklabel]++; + class_sums[worklabel]++; + cluster_sums[c]++; + total_entries++; + covered = true; + } + } + if (!covered) { + cluster_class_weights[numCluster - 1][worklabel]++; + class_sums[worklabel]++; + cluster_sums[numCluster - 1]++; + total_entries++; + } + + } + + initalBuildTimestamp = points.get(0).getTimestamp(); + } + + public int getClusterClassWeight(int i, int j) { + return cluster_class_weights[i][j]; + } + + public int getClusterSum(int i) { + return cluster_sums[i]; + } + + public int getClassSum(int j) { + return class_sums[j]; + } + + public int getClassDistribution(int j) { + return class_distribution[j]; + } + + public int getClusterClassWeightByLabel(int cluster, int classLabel) { + return cluster_class_weights[cluster][classmap.get(classLabel)]; + } + + public int getClassSumByLabel(int classLabel) { + return class_sums[classmap.get(classLabel)]; + } + + public int getClassDistributionByLabel(int classLabel) { + return class_distribution[classmap.get(classLabel)]; + } + + public int getTotalEntries() { + return total_entries; + } + + public int getNumClasses() { + return classmap.size(); + } + + public boolean hasNoiseClass() { + return classmap.containsKey(-1); + } + + @Override + public String toString() { + StringBuffer sb = new StringBuffer(); + sb.append("Membership Matrix\n"); + for (int i = 0; i < cluster_class_weights.length; i++) { + for (int j = 0; j < cluster_class_weights[i].length; j++) { + sb.append(cluster_class_weights[i][j] + "\t "); + } + sb.append("| " + cluster_sums[i] + "\n"); + } + // sb.append("-----------\n"); + for (int i = 0; i < class_sums.length; i++) { + sb.append(class_sums[i] + "\t "); + } + sb.append("| " + total_entries + "\n"); + + sb.append("Real class distribution \n"); + for (int i = 0; i < class_distribution.length; i++) { + sb.append(class_distribution[i] + "\t "); + } + sb.append("| " + total_class_entries + "\n"); + + return sb.toString(); + } + + public int getInitalBuildTimestamp() { + return initalBuildTimestamp; + } + +} http://git-wip-us.apache.org/repos/asf/incubator-samoa/blob/9b178f63/samoa-api/src/main/java/org/apache/samoa/moa/learners/Learner.java ---------------------------------------------------------------------- diff --git a/samoa-api/src/main/java/org/apache/samoa/moa/learners/Learner.java b/samoa-api/src/main/java/org/apache/samoa/moa/learners/Learner.java new file mode 100644 index 0000000..8905ea5 --- /dev/null +++ b/samoa-api/src/main/java/org/apache/samoa/moa/learners/Learner.java @@ -0,0 +1,130 @@ +package org.apache.samoa.moa.learners; + +/* + * #%L + * SAMOA + * %% + * Copyright (C) 2014 - 2015 Apache Software Foundation + * %% + * Licensed under the Apache License, Version 2.0 (the "License"); + * you may not use this file except in compliance with the License. + * You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + * #L% + */ + +import org.apache.samoa.instances.InstancesHeader; +import org.apache.samoa.moa.MOAObject; +import org.apache.samoa.moa.core.Example; +import org.apache.samoa.moa.core.Measurement; +import org.apache.samoa.moa.options.OptionHandler; + +/** + * Learner interface for incremental learning models. + * + * @author Richard Kirkby ([email protected]) + * @version $Revision: 7 $ + */ +public interface Learner<E extends Example> extends MOAObject, OptionHandler { + + /** + * Gets whether this learner needs a random seed. Examples of methods that needs a random seed are bagging and + * boosting. + * + * @return true if the learner needs a random seed. + */ + public boolean isRandomizable(); + + /** + * Sets the seed for random number generation. + * + * @param s + * the seed + */ + public void setRandomSeed(int s); + + /** + * Gets whether training has started. + * + * @return true if training has started + */ + public boolean trainingHasStarted(); + + /** + * Gets the sum of the weights of the instances that have been used by this learner during the training in + * <code>trainOnInstance</code> + * + * @return the weight of the instances that have been used training + */ + public double trainingWeightSeenByModel(); + + /** + * Resets this learner. It must be similar to starting a new learner from scratch. + * + */ + public void resetLearning(); + + /** + * Trains this learner incrementally using the given example. + * + * @param inst + * the instance to be used for training + */ + public void trainOnInstance(E example); + + /** + * Predicts the class memberships for a given instance. If an instance is unclassified, the returned array elements + * must be all zero. + * + * @param inst + * the instance to be classified + * @return an array containing the estimated membership probabilities of the test instance in each class + */ + public double[] getVotesForInstance(E example); + + /** + * Gets the current measurements of this learner. + * + * @return an array of measurements to be used in evaluation tasks + */ + public Measurement[] getModelMeasurements(); + + /** + * Gets the learners of this ensemble. Returns null if this learner is a single learner. + * + * @return an array of the learners of the ensemble + */ + public Learner[] getSublearners(); + + /** + * Gets the model if this learner. + * + * @return the copy of this learner + */ + public MOAObject getModel(); + + /** + * Sets the reference to the header of the data stream. The header of the data stream is extended from WEKA + * <code>Instances</code>. This header is needed to know the number of classes and attributes + * + * @param ih + * the reference to the data stream header + */ + public void setModelContext(InstancesHeader ih); + + /** + * Gets the reference to the header of the data stream. The header of the data stream is extended from WEKA + * <code>Instances</code>. This header is needed to know the number of classes and attributes + * + * @return the reference to the data stream header + */ + public InstancesHeader getModelContext(); + +}
