Extend NumberTools to support int/long/float/double to string --------------------------------------------------------------
Key: LUCENE-530 URL: http://issues.apache.org/jira/browse/LUCENE-530 Project: Lucene - Java Type: Improvement Components: Analysis Versions: 1.9 Reporter: Andy Hind Priority: Minor Extend Number tools to support int/long/float/double to string So you can search using range queries on int/long/float/double, if you want. Here is the basis for how NumberTools cold be extended to support int/long/double/float. As I only write these values to the index and fix tokenisation in searchesI was not so fussed about the reverse transformations back to Strings. public class NumericEncoder { /* * Constants for integer encoding */ static int INTEGER_SIGN_MASK = 0x80000000; /* * Constants for long encoding */ static long LONG_SIGN_MASK = 0x8000000000000000L; /* * Constants for float encoding */ static int FLOAT_SIGN_MASK = 0x80000000; static int FLOAT_EXPONENT_MASK = 0x7F800000; static int FLOAT_MANTISSA_MASK = 0x007FFFFF; /* * Constants for double encoding */ static long DOUBLE_SIGN_MASK = 0x8000000000000000L; static long DOUBLE_EXPONENT_MASK = 0x7FF0000000000000L; static long DOUBLE_MANTISSA_MASK = 0x000FFFFFFFFFFFFFL; private NumericEncoder() { super(); } /** * Encode an integer into a string that orders correctly using string * comparison Integer.MIN_VALUE encodes as 00000000 and MAX_VALUE as * ffffffff. * * @param intToEncode * @return */ public static String encode(int intToEncode) { int replacement = intToEncode ^ INTEGER_SIGN_MASK; return encodeToHex(replacement); } /** * Encode a long into a string that orders correctly using string comparison * Long.MIN_VALUE encodes as 0000000000000000 and MAX_VALUE as * ffffffffffffffff. * * @param longToEncode * @return */ public static String encode(long longToEncode) { long replacement = longToEncode ^ LONG_SIGN_MASK; return encodeToHex(replacement); } /** * Encode a float into a string that orders correctly according to string * comparison. Note that there is no negative NaN but there are codings that * imply this. So NaN and -Infinity may not compare as expected. * * @param floatToEncode * @return */ public static String encode(float floatToEncode) { int bits = Float.floatToIntBits(floatToEncode); int sign = bits & FLOAT_SIGN_MASK; int exponent = bits & FLOAT_EXPONENT_MASK; int mantissa = bits & FLOAT_MANTISSA_MASK; if (sign != 0) { exponent ^= FLOAT_EXPONENT_MASK; mantissa ^= FLOAT_MANTISSA_MASK; } sign ^= FLOAT_SIGN_MASK; int replacement = sign | exponent | mantissa; return encodeToHex(replacement); } /** * Encode a double into a string that orders correctly according to string * comparison. Note that there is no negative NaN but there are codings that * imply this. So NaN and -Infinity may not compare as expected. * * @param doubleToEncode * @return */ public static String encode(double doubleToEncode) { long bits = Double.doubleToLongBits(doubleToEncode); long sign = bits & DOUBLE_SIGN_MASK; long exponent = bits & DOUBLE_EXPONENT_MASK; long mantissa = bits & DOUBLE_MANTISSA_MASK; if (sign != 0) { exponent ^= DOUBLE_EXPONENT_MASK; mantissa ^= DOUBLE_MANTISSA_MASK; } sign ^= DOUBLE_SIGN_MASK; long replacement = sign | exponent | mantissa; return encodeToHex(replacement); } private static String encodeToHex(int i) { char[] buf = new char[] { '0', '0', '0', '0', '0', '0', '0', '0' }; int charPos = 8; do { buf[--charPos] = DIGITS[i & MASK]; i >>>= 4; } while (i != 0); return new String(buf); } private static String encodeToHex(long l) { char[] buf = new char[] { '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0', '0' }; int charPos = 16; do { buf[--charPos] = DIGITS[(int) l & MASK]; l >>>= 4; } while (l != 0); return new String(buf); } private static final char[] DIGITS = { '0', '1', '2', '3', '4', '5', '6', '7', '8', '9', 'a', 'b', 'c', 'd', 'e', 'f' }; private static final int MASK = (1 << 4) - 1; } public class NumericEncodingTest extends TestCase { public NumericEncodingTest() { super(); } public NumericEncodingTest(String arg0) { super(arg0); } /** * Do an exhaustive test for integers * */ public void xtestAllIntegerEncodings() { String lastString = null; String nextString = null; for (long i = Integer.MIN_VALUE; i <= Integer.MAX_VALUE; i++) { nextString = NumericEncoder.encode((int) i); if (lastString != null) { assertFalse(lastString.compareTo(nextString) > 0); } lastString = nextString; } } /** * Do an exhaustive test for float * */ public void xtestAllFloatEncodings() { Float last = null; Float next = null; String lastString = null; String nextString = null; for (int sign = 1; sign >= 0; sign--) { if (sign == 0) { for (int exponent = 0; exponent <= 0xFF; exponent++) { for (int mantissa = 0; mantissa <= 0x007FFFFF; mantissa++) { int bitPattern = sign << 31 | exponent << 23 | mantissa; next = Float.intBitsToFloat(bitPattern); if (!next.equals(Float.NaN) && (last != null) && (last.compareTo(next) > 0)) { System.err.println(last + " > " + next); } if (!next.equals(Float.NaN)) { nextString = NumericEncoder.encode(next); if ((lastString != null) && (lastString.compareTo(nextString) > 0)) { System.err.println(lastString + " > " + nextString); } lastString = nextString; } last = next; } } } else { for (int exponent = 0xFF; exponent >= 0; exponent--) { for (int mantissa = 0x007FFFFF; mantissa >= 0; mantissa--) { int bitPattern = sign << 31 | exponent << 23 | mantissa; next = Float.intBitsToFloat(bitPattern); if (!next.equals(Float.NaN) && (last != null) && (last.compareTo(next) > 0)) { System.err.println(last + " > " + next); } if (!next.equals(Float.NaN)) { nextString = NumericEncoder.encode(next); if ((lastString != null) && (lastString.compareTo(nextString) > 0)) { System.err.println(lastString + " > " + nextString); } lastString = nextString; } last = next; } } } } } /* * Sample test for int */ public void testIntegerEncoding() { assertEquals("00000000", NumericEncoder.encode(Integer.MIN_VALUE)); assertEquals("00000001", NumericEncoder.encode(Integer.MIN_VALUE + 1)); assertEquals("7fffffff", NumericEncoder.encode(-1)); assertEquals("80000000", NumericEncoder.encode(0)); assertEquals("80000001", NumericEncoder.encode(1)); assertEquals("fffffffe", NumericEncoder.encode(Integer.MAX_VALUE - 1)); assertEquals("ffffffff", NumericEncoder.encode(Integer.MAX_VALUE)); } /* * Sample test for long */ public void testLongEncoding() { assertEquals("0000000000000000", NumericEncoder.encode(Long.MIN_VALUE)); assertEquals("0000000000000001", NumericEncoder.encode(Long.MIN_VALUE + 1)); assertEquals("7fffffffffffffff", NumericEncoder.encode(-1L)); assertEquals("8000000000000000", NumericEncoder.encode(0L)); assertEquals("8000000000000001", NumericEncoder.encode(1L)); assertEquals("fffffffffffffffe", NumericEncoder.encode(Long.MAX_VALUE - 1)); assertEquals("ffffffffffffffff", NumericEncoder.encode(Long.MAX_VALUE)); } /* * Sample test for float */ public void testFloatEncoding() { assertEquals("007fffff", NumericEncoder.encode(Float.NEGATIVE_INFINITY)); assertEquals("00800000", NumericEncoder.encode(-Float.MAX_VALUE)); assertEquals("7ffffffe", NumericEncoder.encode(-Float.MIN_VALUE)); assertEquals("7fffffff", NumericEncoder.encode(-0f)); assertEquals("80000000", NumericEncoder.encode(0f)); assertEquals("80000001", NumericEncoder.encode(Float.MIN_VALUE)); assertEquals("ff7fffff", NumericEncoder.encode(Float.MAX_VALUE)); assertEquals("ff800000", NumericEncoder.encode(Float.POSITIVE_INFINITY)); assertEquals("ffc00000", NumericEncoder.encode(Float.NaN)); } /* * Sample test for double */ public void testDoubleEncoding() { assertEquals("000fffffffffffff", NumericEncoder.encode(Double.NEGATIVE_INFINITY)); assertEquals("0010000000000000", NumericEncoder.encode(-Double.MAX_VALUE)); assertEquals("7ffffffffffffffe", NumericEncoder.encode(-Double.MIN_VALUE)); assertEquals("7fffffffffffffff", NumericEncoder.encode(-0d)); assertEquals("8000000000000000", NumericEncoder.encode(0d)); assertEquals("8000000000000001", NumericEncoder.encode(Double.MIN_VALUE)); assertEquals("ffefffffffffffff", NumericEncoder.encode(Double.MAX_VALUE)); assertEquals("fff0000000000000", NumericEncoder.encode(Double.POSITIVE_INFINITY)); assertEquals("fff8000000000000", NumericEncoder.encode(Double.NaN)); } } -- This message is automatically generated by JIRA. - If you think it was sent incorrectly contact one of the administrators: http://issues.apache.org/jira/secure/Administrators.jspa - For more information on JIRA, see: http://www.atlassian.com/software/jira --------------------------------------------------------------------- To unsubscribe, e-mail: [EMAIL PROTECTED] For additional commands, e-mail: [EMAIL PROTECTED]