[ https://issues.apache.org/jira/browse/LUCENE-530?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel#action_12503769 ]
mohammad norouzi commented on LUCENE-530: ----------------------------------------- I just want to know if this works fine why they don't add it to the Lucene's API? Now I need this in my search engine program > Extend NumberTools to support int/long/float/double to string > ------------------------------------------------------------- > > Key: LUCENE-530 > URL: https://issues.apache.org/jira/browse/LUCENE-530 > Project: Lucene - Java > Issue Type: Improvement > Components: Analysis > Affects Versions: 1.9 > Reporter: Andy Hind > Priority: Minor > > Extend Number tools to support int/long/float/double to string > So you can search using range queries on int/long/float/double, if you want. > Here is the basis for how NumberTools cold be extended to support > int/long/double/float. > As I only write these values to the index and fix tokenisation in searchesI > was not so fussed about the reverse transformations back to Strings. > public class NumericEncoder > { > /* > * Constants for integer encoding > */ > static int INTEGER_SIGN_MASK = 0x80000000; > /* > * Constants for long encoding > */ > static long LONG_SIGN_MASK = 0x8000000000000000L; > /* > * Constants for float encoding > */ > static int FLOAT_SIGN_MASK = 0x80000000; > static int FLOAT_EXPONENT_MASK = 0x7F800000; > static int FLOAT_MANTISSA_MASK = 0x007FFFFF; > /* > * Constants for double encoding > */ > static long DOUBLE_SIGN_MASK = 0x8000000000000000L; > static long DOUBLE_EXPONENT_MASK = 0x7FF0000000000000L; > static long DOUBLE_MANTISSA_MASK = 0x000FFFFFFFFFFFFFL; > private NumericEncoder() > { > super(); > } > /** > * Encode an integer into a string that orders correctly using string > * comparison Integer.MIN_VALUE encodes as 00000000 and MAX_VALUE as > * ffffffff. > * > * @param intToEncode > * @return > */ > public static String encode(int intToEncode) > { > int replacement = intToEncode ^ INTEGER_SIGN_MASK; > return encodeToHex(replacement); > } > /** > * Encode a long into a string that orders correctly using string > comparison > * Long.MIN_VALUE encodes as 0000000000000000 and MAX_VALUE as > * ffffffffffffffff. > * > * @param longToEncode > * @return > */ > public static String encode(long longToEncode) > { > long replacement = longToEncode ^ LONG_SIGN_MASK; > return encodeToHex(replacement); > } > /** > * Encode a float into a string that orders correctly according to string > * comparison. Note that there is no negative NaN but there are codings > that > * imply this. So NaN and -Infinity may not compare as expected. > * > * @param floatToEncode > * @return > */ > public static String encode(float floatToEncode) > { > int bits = Float.floatToIntBits(floatToEncode); > int sign = bits & FLOAT_SIGN_MASK; > int exponent = bits & FLOAT_EXPONENT_MASK; > int mantissa = bits & FLOAT_MANTISSA_MASK; > if (sign != 0) > { > exponent ^= FLOAT_EXPONENT_MASK; > mantissa ^= FLOAT_MANTISSA_MASK; > } > sign ^= FLOAT_SIGN_MASK; > int replacement = sign | exponent | mantissa; > return encodeToHex(replacement); > } > /** > * Encode a double into a string that orders correctly according to string > * comparison. Note that there is no negative NaN but there are codings > that > * imply this. So NaN and -Infinity may not compare as expected. > * > * @param doubleToEncode > * @return > */ > public static String encode(double doubleToEncode) > { > long bits = Double.doubleToLongBits(doubleToEncode); > long sign = bits & DOUBLE_SIGN_MASK; > long exponent = bits & DOUBLE_EXPONENT_MASK; > long mantissa = bits & DOUBLE_MANTISSA_MASK; > if (sign != 0) > { > exponent ^= DOUBLE_EXPONENT_MASK; > mantissa ^= DOUBLE_MANTISSA_MASK; > } > sign ^= DOUBLE_SIGN_MASK; > long replacement = sign | exponent | mantissa; > return encodeToHex(replacement); > } > private static String encodeToHex(int i) > { > char[] buf = new char[] { '0', '0', '0', '0', '0', '0', '0', '0' }; > int charPos = 8; > do > { > buf[--charPos] = DIGITS[i & MASK]; > i >>>= 4; > } > while (i != 0); > return new String(buf); > } > private static String encodeToHex(long l) > { > char[] buf = new char[] { '0', '0', '0', '0', '0', '0', '0', '0', > '0', '0', '0', '0', '0', '0', '0', '0' }; > int charPos = 16; > do > { > buf[--charPos] = DIGITS[(int) l & MASK]; > l >>>= 4; > } > while (l != 0); > return new String(buf); > } > private static final char[] DIGITS = { '0', '1', '2', '3', '4', '5', '6', > '7', '8', '9', 'a', 'b', 'c', 'd', 'e', > 'f' }; > private static final int MASK = (1 << 4) - 1; > } > public class NumericEncodingTest extends TestCase > { > public NumericEncodingTest() > { > super(); > } > public NumericEncodingTest(String arg0) > { > super(arg0); > } > /** > * Do an exhaustive test for integers > * > */ > public void xtestAllIntegerEncodings() > { > String lastString = null; > String nextString = null; > for (long i = Integer.MIN_VALUE; i <= Integer.MAX_VALUE; i++) > { > nextString = NumericEncoder.encode((int) i); > if (lastString != null) > { > assertFalse(lastString.compareTo(nextString) > 0); > } > lastString = nextString; > } > } > /** > * Do an exhaustive test for float > * > */ > public void xtestAllFloatEncodings() > { > Float last = null; > Float next = null; > String lastString = null; > String nextString = null; > for (int sign = 1; sign >= 0; sign--) > { > if (sign == 0) > { > for (int exponent = 0; exponent <= 0xFF; exponent++) > { > for (int mantissa = 0; mantissa <= 0x007FFFFF; mantissa++) > { > int bitPattern = sign << 31 | exponent << 23 | > mantissa; > next = Float.intBitsToFloat(bitPattern); > if (!next.equals(Float.NaN) && (last != null) && > (last.compareTo(next) > 0)) > { > System.err.println(last + " > " + next); > } > if (!next.equals(Float.NaN)) > { > nextString = NumericEncoder.encode(next); > if ((lastString != null) && > (lastString.compareTo(nextString) > 0)) > { > System.err.println(lastString + " > " + > nextString); > } > lastString = nextString; > } > last = next; > } > } > } > else > { > for (int exponent = 0xFF; exponent >= 0; exponent--) > { > for (int mantissa = 0x007FFFFF; mantissa >= 0; mantissa--) > { > int bitPattern = sign << 31 | exponent << 23 | > mantissa; > next = Float.intBitsToFloat(bitPattern); > if (!next.equals(Float.NaN) && (last != null) && > (last.compareTo(next) > 0)) > { > System.err.println(last + " > " + next); > } > if (!next.equals(Float.NaN)) > { > nextString = NumericEncoder.encode(next); > if ((lastString != null) && > (lastString.compareTo(nextString) > 0)) > { > System.err.println(lastString + " > " + > nextString); > } > lastString = nextString; > } > last = next; > } > } > } > } > } > /* > * Sample test for int > */ > public void testIntegerEncoding() > { > assertEquals("00000000", NumericEncoder.encode(Integer.MIN_VALUE)); > assertEquals("00000001", NumericEncoder.encode(Integer.MIN_VALUE + > 1)); > assertEquals("7fffffff", NumericEncoder.encode(-1)); > assertEquals("80000000", NumericEncoder.encode(0)); > assertEquals("80000001", NumericEncoder.encode(1)); > assertEquals("fffffffe", NumericEncoder.encode(Integer.MAX_VALUE - > 1)); > assertEquals("ffffffff", NumericEncoder.encode(Integer.MAX_VALUE)); > } > /* > * Sample test for long > */ > public void testLongEncoding() > { > assertEquals("0000000000000000", > NumericEncoder.encode(Long.MIN_VALUE)); > assertEquals("0000000000000001", NumericEncoder.encode(Long.MIN_VALUE > + 1)); > assertEquals("7fffffffffffffff", NumericEncoder.encode(-1L)); > assertEquals("8000000000000000", NumericEncoder.encode(0L)); > assertEquals("8000000000000001", NumericEncoder.encode(1L)); > assertEquals("fffffffffffffffe", NumericEncoder.encode(Long.MAX_VALUE > - 1)); > assertEquals("ffffffffffffffff", > NumericEncoder.encode(Long.MAX_VALUE)); > } > /* > * Sample test for float > */ > public void testFloatEncoding() > { > assertEquals("007fffff", > NumericEncoder.encode(Float.NEGATIVE_INFINITY)); > assertEquals("00800000", NumericEncoder.encode(-Float.MAX_VALUE)); > assertEquals("7ffffffe", NumericEncoder.encode(-Float.MIN_VALUE)); > assertEquals("7fffffff", NumericEncoder.encode(-0f)); > assertEquals("80000000", NumericEncoder.encode(0f)); > assertEquals("80000001", NumericEncoder.encode(Float.MIN_VALUE)); > assertEquals("ff7fffff", NumericEncoder.encode(Float.MAX_VALUE)); > assertEquals("ff800000", > NumericEncoder.encode(Float.POSITIVE_INFINITY)); > assertEquals("ffc00000", NumericEncoder.encode(Float.NaN)); > } > /* > * Sample test for double > */ > public void testDoubleEncoding() > { > assertEquals("000fffffffffffff", > NumericEncoder.encode(Double.NEGATIVE_INFINITY)); > assertEquals("0010000000000000", > NumericEncoder.encode(-Double.MAX_VALUE)); > assertEquals("7ffffffffffffffe", > NumericEncoder.encode(-Double.MIN_VALUE)); > assertEquals("7fffffffffffffff", NumericEncoder.encode(-0d)); > assertEquals("8000000000000000", NumericEncoder.encode(0d)); > assertEquals("8000000000000001", > NumericEncoder.encode(Double.MIN_VALUE)); > assertEquals("ffefffffffffffff", > NumericEncoder.encode(Double.MAX_VALUE)); > assertEquals("fff0000000000000", > NumericEncoder.encode(Double.POSITIVE_INFINITY)); > assertEquals("fff8000000000000", NumericEncoder.encode(Double.NaN)); > } > } -- This message is automatically generated by JIRA. - You can reply to this email to add a comment to the issue online. --------------------------------------------------------------------- To unsubscribe, e-mail: [EMAIL PROTECTED] For additional commands, e-mail: [EMAIL PROTECTED]