Extend NumberTools to support int/long/float/double to string 
--------------------------------------------------------------

         Key: LUCENE-530
         URL: http://issues.apache.org/jira/browse/LUCENE-530
     Project: Lucene - Java
        Type: Improvement
  Components: Analysis  
    Versions: 1.9    
    Reporter: Andy Hind
    Priority: Minor


Extend Number tools to support int/long/float/double to string 

So you can search using range queries on int/long/float/double, if you want.

Here is the basis for how NumberTools cold be extended to support 
int/long/double/float.
As I only write these values to the index and fix tokenisation in searchesI was 
not so fussed about the reverse transformations back to Strings.



public class NumericEncoder
{
    /*
     * Constants for integer encoding
     */

    static int INTEGER_SIGN_MASK = 0x80000000;

    /*
     * Constants for long encoding
     */

    static long LONG_SIGN_MASK = 0x8000000000000000L;

    /*
     * Constants for float encoding
     */

    static int FLOAT_SIGN_MASK = 0x80000000;

    static int FLOAT_EXPONENT_MASK = 0x7F800000;

    static int FLOAT_MANTISSA_MASK = 0x007FFFFF;

    /*
     * Constants for double encoding
     */

    static long DOUBLE_SIGN_MASK = 0x8000000000000000L;

    static long DOUBLE_EXPONENT_MASK = 0x7FF0000000000000L;

    static long DOUBLE_MANTISSA_MASK = 0x000FFFFFFFFFFFFFL;

    private NumericEncoder()
    {
        super();
    }

    /**
     * Encode an integer into a string that orders correctly using string
     * comparison Integer.MIN_VALUE encodes as 00000000 and MAX_VALUE as
     * ffffffff.
     * 
     * @param intToEncode
     * @return
     */
    public static String encode(int intToEncode)
    {
        int replacement = intToEncode ^ INTEGER_SIGN_MASK;
        return encodeToHex(replacement);
    }

    /**
     * Encode a long into a string that orders correctly using string comparison
     * Long.MIN_VALUE encodes as 0000000000000000 and MAX_VALUE as
     * ffffffffffffffff.
     * 
     * @param longToEncode
     * @return
     */
    public static String encode(long longToEncode)
    {
        long replacement = longToEncode ^ LONG_SIGN_MASK;
        return encodeToHex(replacement);
    }

    /**
     * Encode a float into a string that orders correctly according to string
     * comparison. Note that there is no negative NaN but there are codings that
     * imply this. So NaN and -Infinity may not compare as expected.
     * 
     * @param floatToEncode
     * @return
     */
    public static String encode(float floatToEncode)
    {
        int bits = Float.floatToIntBits(floatToEncode);
        int sign = bits & FLOAT_SIGN_MASK;
        int exponent = bits & FLOAT_EXPONENT_MASK;
        int mantissa = bits & FLOAT_MANTISSA_MASK;
        if (sign != 0)
        {
            exponent ^= FLOAT_EXPONENT_MASK;
            mantissa ^= FLOAT_MANTISSA_MASK;
        }
        sign ^= FLOAT_SIGN_MASK;
        int replacement = sign | exponent | mantissa;
        return encodeToHex(replacement);
    }

    /**
     * Encode a double into a string that orders correctly according to string
     * comparison. Note that there is no negative NaN but there are codings that
     * imply this. So NaN and -Infinity may not compare as expected.
     * 
     * @param doubleToEncode
     * @return
     */
    public static String encode(double doubleToEncode)
    {
        long bits = Double.doubleToLongBits(doubleToEncode);
        long sign = bits & DOUBLE_SIGN_MASK;
        long exponent = bits & DOUBLE_EXPONENT_MASK;
        long mantissa = bits & DOUBLE_MANTISSA_MASK;
        if (sign != 0)
        {
            exponent ^= DOUBLE_EXPONENT_MASK;
            mantissa ^= DOUBLE_MANTISSA_MASK;
        }
        sign ^= DOUBLE_SIGN_MASK;
        long replacement = sign | exponent | mantissa;
        return encodeToHex(replacement);
    }

    private static String encodeToHex(int i)
    {
        char[] buf = new char[] { '0', '0', '0', '0', '0', '0', '0', '0' };
        int charPos = 8;
        do
        {
            buf[--charPos] = DIGITS[i & MASK];
            i >>>= 4;
        }
        while (i != 0);
        return new String(buf);
    }

    private static String encodeToHex(long l)
    {
        char[] buf = new char[] { '0', '0', '0', '0', '0', '0', '0', '0', '0', 
'0', '0', '0', '0', '0', '0', '0' };
        int charPos = 16;
        do
        {
            buf[--charPos] = DIGITS[(int) l & MASK];
            l >>>= 4;
        }
        while (l != 0);
        return new String(buf);
    }

    private static final char[] DIGITS = { '0', '1', '2', '3', '4', '5', '6', 
'7', '8', '9', 'a', 'b', 'c', 'd', 'e',
            'f' };

    private static final int MASK = (1 << 4) - 1;
}
























public class NumericEncodingTest extends TestCase
{

    public NumericEncodingTest()
    {
        super();
    }

    public NumericEncodingTest(String arg0)
    {
        super(arg0);
    }

    /**
     * Do an exhaustive test for integers
     * 
     */
    public void xtestAllIntegerEncodings()
    {
        String lastString = null;
        String nextString = null;
        for (long i = Integer.MIN_VALUE; i <= Integer.MAX_VALUE; i++)
        {
            nextString = NumericEncoder.encode((int) i);
            if (lastString != null)
            {
                assertFalse(lastString.compareTo(nextString) > 0);
            }
            lastString = nextString;
        }
    }

    /**
     * Do an exhaustive test for float
     * 
     */
    public void xtestAllFloatEncodings()
    {
        Float last = null;
        Float next = null;
        String lastString = null;
        String nextString = null;

        for (int sign = 1; sign >= 0; sign--)
        {
            if (sign == 0)
            {
                for (int exponent = 0; exponent <= 0xFF; exponent++)
                {
                    for (int mantissa = 0; mantissa <= 0x007FFFFF; mantissa++)
                    {
                        int bitPattern = sign << 31 | exponent << 23 | mantissa;
                        next = Float.intBitsToFloat(bitPattern);

                        if (!next.equals(Float.NaN) && (last != null) && 
(last.compareTo(next) > 0))
                        {
                            System.err.println(last + " > " + next);
                        }
                        if (!next.equals(Float.NaN))
                        {
                            nextString = NumericEncoder.encode(next);
                            if ((lastString != null) && 
(lastString.compareTo(nextString) > 0))
                            {
                                System.err.println(lastString + " > " + 
nextString);
                            }
                            lastString = nextString;
                        }
                        last = next;

                    }
                }
            }
            else
            {
                for (int exponent = 0xFF; exponent >= 0; exponent--)
                {
                    for (int mantissa = 0x007FFFFF; mantissa >= 0; mantissa--)
                    {
                        int bitPattern = sign << 31 | exponent << 23 | mantissa;
                        next = Float.intBitsToFloat(bitPattern);
                        if (!next.equals(Float.NaN) && (last != null) && 
(last.compareTo(next) > 0))
                        {
                            System.err.println(last + " > " + next);
                        }
                        if (!next.equals(Float.NaN))
                        {
                            nextString = NumericEncoder.encode(next);
                            if ((lastString != null) && 
(lastString.compareTo(nextString) > 0))
                            {
                                System.err.println(lastString + " > " + 
nextString);
                            }
                            lastString = nextString;
                        }
                        last = next;
                    }
                }
            }
        }
    }

    /*
     * Sample test for int
     */

    public void testIntegerEncoding()
    {
        assertEquals("00000000", NumericEncoder.encode(Integer.MIN_VALUE));
        assertEquals("00000001", NumericEncoder.encode(Integer.MIN_VALUE + 1));
        assertEquals("7fffffff", NumericEncoder.encode(-1));
        assertEquals("80000000", NumericEncoder.encode(0));
        assertEquals("80000001", NumericEncoder.encode(1));
        assertEquals("fffffffe", NumericEncoder.encode(Integer.MAX_VALUE - 1));
        assertEquals("ffffffff", NumericEncoder.encode(Integer.MAX_VALUE));
    }

    /*
     * Sample test for long
     */

    public void testLongEncoding()
    {
        assertEquals("0000000000000000", NumericEncoder.encode(Long.MIN_VALUE));
        assertEquals("0000000000000001", NumericEncoder.encode(Long.MIN_VALUE + 
1));
        assertEquals("7fffffffffffffff", NumericEncoder.encode(-1L));
        assertEquals("8000000000000000", NumericEncoder.encode(0L));
        assertEquals("8000000000000001", NumericEncoder.encode(1L));
        assertEquals("fffffffffffffffe", NumericEncoder.encode(Long.MAX_VALUE - 
1));
        assertEquals("ffffffffffffffff", 
NumericEncoder.encode(Long.MAX_VALUE));      
    }

    /*
     * Sample test for float
     */

    public void testFloatEncoding()
    {
        assertEquals("007fffff", 
NumericEncoder.encode(Float.NEGATIVE_INFINITY));
        assertEquals("00800000", NumericEncoder.encode(-Float.MAX_VALUE));
        assertEquals("7ffffffe", NumericEncoder.encode(-Float.MIN_VALUE));
        assertEquals("7fffffff", NumericEncoder.encode(-0f));
        assertEquals("80000000", NumericEncoder.encode(0f));
        assertEquals("80000001", NumericEncoder.encode(Float.MIN_VALUE));
        assertEquals("ff7fffff", NumericEncoder.encode(Float.MAX_VALUE));
        assertEquals("ff800000", 
NumericEncoder.encode(Float.POSITIVE_INFINITY));
        assertEquals("ffc00000", NumericEncoder.encode(Float.NaN));

    }

    /*
     * Sample test for double
     */

    public void testDoubleEncoding()
    {
        assertEquals("000fffffffffffff", 
NumericEncoder.encode(Double.NEGATIVE_INFINITY));
        assertEquals("0010000000000000", 
NumericEncoder.encode(-Double.MAX_VALUE));
        assertEquals("7ffffffffffffffe", 
NumericEncoder.encode(-Double.MIN_VALUE));
        assertEquals("7fffffffffffffff", NumericEncoder.encode(-0d));
        assertEquals("8000000000000000", NumericEncoder.encode(0d));
        assertEquals("8000000000000001", 
NumericEncoder.encode(Double.MIN_VALUE));
        assertEquals("ffefffffffffffff", 
NumericEncoder.encode(Double.MAX_VALUE));
        assertEquals("fff0000000000000", 
NumericEncoder.encode(Double.POSITIVE_INFINITY));
        assertEquals("fff8000000000000", NumericEncoder.encode(Double.NaN));

    }
}


-- 
This message is automatically generated by JIRA.
-
If you think it was sent incorrectly contact one of the administrators:
   http://issues.apache.org/jira/secure/Administrators.jspa
-
For more information on JIRA, see:
   http://www.atlassian.com/software/jira


---------------------------------------------------------------------
To unsubscribe, e-mail: [EMAIL PROTECTED]
For additional commands, e-mail: [EMAIL PROTECTED]

Reply via email to