Hello,

I have prepared two minor corrections for the following 2 problems.
It looks they are working fine:

> +--------+--------------------------+------------------------+
> |VALUE   |col_num                   |CAST(col_num AS NUMERIC)|
> +--------+--------+-------------+---+-----+-------------+----+
> |        |is      |IMHO shall be|ok?|is   |IMHO shall be|ok? |
> +--------+--------+-------------+---+-----+-------------+----+
> |'9...1 '| 9...0  | 9...1       |-- |9...0|9...1        |--  |
> |'9...1x'|'9...1x'|'9...1x'     |OK |9...0|9...1        |--  |
> +--------+--------+-------------+---+-----+-------------+----+

1. Trailing spaces are allowed in perfect INTEGERs as far as are allowed 
in perfect REALs.

2. CAST(TEXT AS NUMERIC) returns INTEGER where REAL gives no better 
results nor a number part of a source string has no REAL look-alike.

Above corrections allow to save up to 10 bits of information at no cost.

3. A side effect is that CAST(TEXT AS INTEGER) returns INTEGER with 
min/max value, if it would be interpreted as a number with a magnitude 
out of range of 64-bit int. I hope, nobody use CAST(TEXT AS INTEGER) as 
a PRNG with a fancy distribution.

According to the ``do not touch this if you do not need to'' rule 
(argued here: 
http://www.mail-archive.com/sqlite-users%40mailinglists.sqlite.org/msg07425.html)
 
no other then previously described (pts 1. to 3.) effects are observed 
nor intended. Especially those requiring a depth refactoring of a type 
system like 
http://www.mail-archive.com/sqlite-users%40mailinglists.sqlite.org/msg07433.html
 
or 
http://www.mail-archive.com/sqlite-users%40mailinglists.sqlite.org/msg07439.html
 
--- the latter is seducing as nothing looks finer then NULL as a result 
of conversion of non-digit trash (like '&a$ksdfrgk*[G') into a number.

In particular:

(a) ``SELECT -'9000000000000000001 ';'' now gives -9000000000000000001 
(according to 1.), while

(b) ``SELECT -'9000000000000000001z';'' still gives -9000000000000000000 
--- this can be avoided by using ``SELECT -CAST('9000000000000000001z' 
AS NUMERIC);'' (according to 2.).

Two function has been changed:

vdbemem.c:sqlite3VdbeMemNumerify():

Change the following lines:
======
     if( 0==sqlite3Atoi64(pMem->z, &pMem->u.i, pMem->n, pMem->enc) ){
       MemSetTypeFlag(pMem, MEM_Int);
     }else{
======
into:
======
     switch ( sqlite3Atoi64(pMem->z, &pMem->u.i, pMem->n, pMem->enc) ) {
       case 0:
       case 3:
         MemSetTypeFlag(pMem, MEM_Int);
         break;
       default:
======

util.c:sqlite3Atoi64() (due to many changes the whole function follows):
======
/*
** Convert zNum to a 64-bit signed integer.  zNum must be decimal. This
** routine does *not* accept hexadecimal notation.
**
** If the zNum value is representable as a 64-bit twos-complement
** integer, then write that value into *pNum and return 0.
**
** If zNum is exactly 9223372036854775808, return 2.  This special
** case is broken out because while 9223372036854775808 cannot be a
** signed 64-bit integer, its negative -9223372036854775808 can be.
**
** If zNum is too big for a 64-bit integer and is not
** 9223372036854775808  or if zNum contains any non-numeric text,
** then return 1. Target *pNum receives value
** < SMALLEST_INT64, LARGEST_INT64 > based on leading digits,
** or 0 if it were no digits.
**
** If it is something wrong with zNum but trying to realify
** will not provide any better results then return 3.
**
** length is the number of bytes in the string (bytes, not characters).
** The string is not necessarily zero-terminated.  The encoding is
** given by enc.
*/
SQLITE_PRIVATE int sqlite3Atoi64(const char *zNum, i64 *pNum, int 
length, u8 enc){
   int incr;
   u64 u = 0;
   int neg = 0; /* assume positive */
   int i;
   int c = 0;
   int nonNum = 0;
   int tooBig = 0;
   int isTrash; /* final zNum[i] points for non-digits */
   int wereDigits; /* prior to final zNum[i] there were digits */
   int hasRealSyntax;
   const char *zStart;
   const char *zEnd = zNum + length;
   assert( enc == SQLITE_UTF8 || enc == SQLITE_UTF16LE || enc == 
SQLITE_UTF16BE );
   if ( enc == SQLITE_UTF8 ) {
     incr = 1;
     /* Skip trailing spaces */
     while ( zNum < zEnd && sqlite3Isspace(zEnd[-1]) ) --zEnd;
   } else {
     incr = 2;
     assert( SQLITE_UTF16LE==2 && SQLITE_UTF16BE==3 );
     for ( i = 3 - enc; i < length && zNum[i] == 0; i += 2) { }
     /* Now i points to non-zero hi-byte */
     nonNum = i < length;
     /* Now nonNum means there are >0xFF univals */
     zEnd = zNum + i;
     /* Skip trailing spaces */
     if ( 0 == nonNum ) while ( zNum < zEnd && sqlite3Isspace(zEnd[-2]) 
) zEnd -= 2;
     zEnd += enc - 3;
     zNum += enc & 1;
   }
   /* Skip leading spaces */
   while ( zNum < zEnd && sqlite3Isspace(*zNum) ) zNum+=incr;
   /* '-' present? */
   if ( zNum < zEnd ) {
     if ( *zNum == '-' ) {
       neg = 1;
       zNum += incr;
     } else if ( *zNum == '+' ) {
       zNum += incr;
     }
   }
   /* zStart points to a positive number part */
   zStart = zNum;
   /* Skip leading zeroes */
   while ( zNum < zEnd && zNum[0] == '0' ) { zNum += incr; }
   for ( i = 0; &zNum[i] < zEnd && (c = zNum[i]) >= '0' && c <= '9'; i 
+= incr ) {
     u = u * 10 + c - '0';
   }

   /* Now:
   **   (1) '-|+'
   **   (2) [zStart] '00000'
   **   (3) [zNum] '20120300230230320843'
   **   (4) [zNum + i] '...'
   **   (5) [zEnd] trailing spaces and/or >U+00FF */

   /* Let's apply valid range to the number */
   if ( i > 19 * incr || u > LARGEST_INT64 ) {
     *pNum = neg ? SMALLEST_INT64 : LARGEST_INT64;
     /* Let's set tooBig if integer part together with leading sign does 
not fit */
     tooBig = i > 19 * incr || 0 == neg || LARGEST_INT64 < u - 1;
   } else if ( neg ) {
     *pNum = -(i64)u;
   } else {
     *pNum = (i64)u;
   }
   testcase( i==18 );
   testcase( i==19 );
   testcase( i==20 );

   isTrash = nonNum || 0 != c && &zNum[i] < zEnd;
   wereDigits = 0 < i || zStart < zNum;

   if ( 0 == isTrash && 0 != wereDigits ) {
     /* Clean and perfect integer so return 0; */
     if ( 0 == tooBig ) return 0;
     /* -SMALLEST_INT64 so, for unary minus, return 2; */
     if ( LARGEST_INT64 == u - 1 ) return 2;
   }

   /* The following part is relevant only for sqlite3VdbeMemNumerify(),
   ** so it can be omitted in other cases
   ** (for example by supplying int nCheckReality parameter, or other). */
   hasRealSyntax = 0 != tooBig || /* too big for 64-bit int, or */
     &zNum[i] < zEnd && (
       '.' == c && ( /* has dot '.'... */
         wereDigits || /* preceded... */
         &zNum[i + 1] < zEnd &&
           '0' <= zNum[i + 1] && /* or followed by digits, or */
           '9' >= zNum[i + 1]
       ) || (
         'e' == c || 'E' == c /* has 'e' or 'E'... */
       ) && (
         &zNum[i + 1] < zEnd && (
           '0' <= zNum[i + 1] && /* followed by digits... */
           '9' >= zNum[i + 1] || (
             '-' == zNum[i + 1] || /* or followed by a sign... */
             '+' == zNum[i + 1]
           ) &&
           &zNum[i + 2] < zEnd &&
           '0' <= zNum[i + 2] && /* and digits */
           '9' >= zNum[i + 2]
         )
       )
     );

   return hasRealSyntax ? 1 : 3;
}
======

-- best regards

Cezary H. Noweta

Reply via email to