I've found a little issue in sqlite3Utf8CharLen function.
If utf-8 sequence is at the end of buffer, and buffer is not 
it will overread by one or
more bytes (within \x80-\xbf range).

E.g.

        const int psz = getpagesize();
        char *p;
        p = 
mmap(NULL,2*psz,PROT_READ|PROT_WRITE,MAP_ANONYMOUS|MAP_PRIVATE,-1,0);
        mprotect(p+psz,psz, PROT_NONE); /* guard page */
        
        memset(p, ' ', psz);
        p[psz-2]=0xd1;
        p[psz-1]=0x8e;
        sqlite3Utf8CharLen(p, psz); // BANG. Over-read by one byte.
        memset(p, 0x80, psz);
        p[0]=0xc0; /* invalid overlong utf-8 sequence */
        sqlite3Utf8CharLen(p, psz/2); // BANG. Over-read by psz/2+1 bytes.

It looks *not* exploitable within sqlite library (it is either called with
-1, or with one [or more] byte larger zero-terminated buffer), and this
function is internal and not exported, so likely not terribly big
matter, but at least it should be noted in comments (in case someone
will carelessly borrow it, like it happend [with older and even more
problematic version of this code] in
http://sqlite.org/contrib//download/extension-functions.c?get=25
fwiw, it *is* exploitable, and should be updated:

        const char *sql = "SELECT padl(?,2)";
        int rc;
        char *err;
        sqlite3 *db;
        sqlite3_stmt *st;
        rc = sqlite3_open(NULL,&db);
        rc = sqlite3_load_extension(db,"./extension-functions.so",NULL,&err);
        rc = sqlite3_prepare_v2(db,sql,-1,&st,NULL);
        memset(p, ' ', psz);
        p[psz-2]=0xef;
        p[psz-1]=0;
        rc = sqlite3_bind_text(st,1,p,-1,SQLITE_STATIC);
        rc = sqlite3_step(st); // BANG.

[and it is not *only* problem with this extension]).

BTW, this utf8-related code is copy-pasted over some dozen times in
various sqlite extensions; it would be good idea to reduce code
duplication, as sooner or later one of them will be overlooked after
update (as it happened with extension-functions.c).

============================================================================

diff --git a/extension-functions.c b/extension-functions.c
--- a/extension-functions.c
+++ b/extension-functions.c
@@ -127,6 +127,10 @@ SQLITE_EXTENSION_INIT1
@@ -155,82 +159,61 @@ static char *sqlite3StrDup( const char *z ) {
 */
 
 /* LMH from sqlite3 3.3.13 */
-/*
-** This table maps from the first byte of a UTF-8 character to the number
-** of trailing bytes expected. A value '4' indicates that the table key
-** is not a legal first byte for a UTF-8 character.
-*/
-static const u8 xtra_utf8_bytes[256]  = {
-/* 0xxxxxxx */
-0, 0, 0, 0, 0, 0, 0, 0,     0, 0, 0, 0, 0, 0, 0, 0,
-0, 0, 0, 0, 0, 0, 0, 0,     0, 0, 0, 0, 0, 0, 0, 0,
-0, 0, 0, 0, 0, 0, 0, 0,     0, 0, 0, 0, 0, 0, 0, 0,
-0, 0, 0, 0, 0, 0, 0, 0,     0, 0, 0, 0, 0, 0, 0, 0,
-0, 0, 0, 0, 0, 0, 0, 0,     0, 0, 0, 0, 0, 0, 0, 0,
-0, 0, 0, 0, 0, 0, 0, 0,     0, 0, 0, 0, 0, 0, 0, 0,
-0, 0, 0, 0, 0, 0, 0, 0,     0, 0, 0, 0, 0, 0, 0, 0,
-0, 0, 0, 0, 0, 0, 0, 0,     0, 0, 0, 0, 0, 0, 0, 0,
-
-/* 10wwwwww */
-4, 4, 4, 4, 4, 4, 4, 4,     4, 4, 4, 4, 4, 4, 4, 4,
-4, 4, 4, 4, 4, 4, 4, 4,     4, 4, 4, 4, 4, 4, 4, 4,
-4, 4, 4, 4, 4, 4, 4, 4,     4, 4, 4, 4, 4, 4, 4, 4,
-4, 4, 4, 4, 4, 4, 4, 4,     4, 4, 4, 4, 4, 4, 4, 4,
-
-/* 110yyyyy */
-1, 1, 1, 1, 1, 1, 1, 1,     1, 1, 1, 1, 1, 1, 1, 1,
-1, 1, 1, 1, 1, 1, 1, 1,     1, 1, 1, 1, 1, 1, 1, 1,
-
-/* 1110zzzz */
-2, 2, 2, 2, 2, 2, 2, 2,     2, 2, 2, 2, 2, 2, 2, 2,
-
-/* 11110yyy */
-3, 3, 3, 3, 3, 3, 3, 3,     4, 4, 4, 4, 4, 4, 4, 4,
-};
-
+/* YK updated to sqlite3 3.12.2 */
 
 /*
-** This table maps from the number of trailing bytes in a UTF-8 character
-** to an integer constant that is effectively calculated for each character
-** read by a naive implementation of a UTF-8 character reader. The code
-** in the READ_UTF8 macro explains things best.
+** This lookup table is used to help decode the first byte of
+** a multi-byte UTF8 character.
 */
-static const int xtra_utf8_bits[] =  {
-  0,
-  12416,          /* (0xC0 << 6) + (0x80) */
-  925824,         /* (0xE0 << 12) + (0x80 << 6) + (0x80) */
-  63447168        /* (0xF0 << 18) + (0x80 << 12) + (0x80 << 6) + 0x80 */
+static const unsigned char sqlite3Utf8Trans1[] = {
+  0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
+  0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
+  0x10, 0x11, 0x12, 0x13, 0x14, 0x15, 0x16, 0x17,
+  0x18, 0x19, 0x1a, 0x1b, 0x1c, 0x1d, 0x1e, 0x1f,
+  0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
+  0x08, 0x09, 0x0a, 0x0b, 0x0c, 0x0d, 0x0e, 0x0f,
+  0x00, 0x01, 0x02, 0x03, 0x04, 0x05, 0x06, 0x07,
+  0x00, 0x01, 0x02, 0x03, 0x00, 0x01, 0x00, 0x00,
 };
 
+/* LMH salvaged from sqlite3 3.3.13 source code src/utf.c */
+/* YK updated to sqlite3 3.12.2 */
+
 /*
-** If a UTF-8 character contains N bytes extra bytes (N bytes follow
-** the initial byte so that the total character length is N+1) then
-** masking the character with utf8_mask[N] must produce a non-zero
-** result.  Otherwise, we have an (illegal) overlong encoding.
+** Translate a single UTF-8 character.  Return the unicode value.
+**
+** Write a pointer to the next unread byte back into *pzNext.
+**
+** Notes On Invalid UTF-8:
+**
+**  *  This routine never allows a 7-bit character (0x00 through 0x7f) to
+**     be encoded as a multi-byte character.  Any multi-byte character that
+**     attempts to encode a value between 0x00 and 0x7f is rendered as 0xfffd.
+**
+**  *  This routine never allows a UTF16 surrogate value to be encoded.
+**     If a multi-byte character attempts to encode a value between
+**     0xd800 and 0xe000 then it is rendered as 0xfffd.
+**
+**  *  Bytes in the range of 0x80 through 0xbf which occur as the first
+**     byte of a character are interpreted as single-byte characters
+**     and rendered as themselves even though they are technically
+**     invalid characters.
+**
+**  *  This routine accepts over-length UTF8 encodings
+**     for unicode values 0x80 and greater.  It does not change over-length
+**     encodings to 0xfffd as some systems recommend.
 */
-static const int utf_mask[] = {
-  0x00000000,
-  0xffffff80,
-  0xfffff800,
-  0xffff0000,
-};
-
-/* LMH salvaged from sqlite3 3.3.13 source code src/utf.c */
-#define READ_UTF8(zIn, c) { \
-  int xtra;                                            \
-  c = *(zIn)++;                                        \
-  xtra = xtra_utf8_bytes[c];                           \
-  switch( xtra ){                                      \
-    case 4: c = (int)0xFFFD; break;                    \
-    case 3: c = (c<<6) + *(zIn)++;                     \
-    case 2: c = (c<<6) + *(zIn)++;                     \
-    case 1: c = (c<<6) + *(zIn)++;                     \
-    c -= xtra_utf8_bits[xtra];                         \
-    if( (utf_mask[xtra]&c)==0                          \
-        || (c&0xFFFFF800)==0xD800                      \
-        || (c&0xFFFFFFFE)==0xFFFE ){  c = 0xFFFD; }    \
-  }                                                    \
-}
+#define READ_UTF8(zIn, c)                                  \
+  c = *(zIn++);                                            \
+  if( c>=0xc0 ){                                           \
+    c = sqlite3Utf8Trans1[c-0xc0];                         \
+    while( (*zIn & 0xc0)==0x80 ){                          \
+      c = (c<<6) + (0x3f & *(zIn++));                      \
+    }                                                      \
+    if( c<0x80                                             \
+        || (c&0xFFFFF800)==0xD800                          \
+        || (c&0xFFFFFFFE)==0xFFFE ){  c = 0xFFFD; }        \
+  }
 
 static int sqlite3ReadUtf8(const unsigned char *z){
   int c;
@@ -238,8 +221,10 @@ static int sqlite3ReadUtf8(const unsigned char *z){
   return c;
 }
 
-#define SKIP_UTF8(zIn) {                               \
-  zIn += (xtra_utf8_bytes[*(u8 *)zIn] + 1);            \
+#define SKIP_UTF8(zIn,zTerm) {                                  \
+  if( ((unsigned char)*(zIn++))>=0xc0 ){                        \
+    while( zIn != zTerm && (*zIn & 0xc0)==0x80 ){ zIn++; }      \
+  }                                                             \
 }
 
 /*
@@ -259,7 +244,7 @@ static int sqlite3Utf8CharLen(const char *z, int nByte){
   }
   assert( z<=zTerm );
   while( *z!=0 && z<zTerm ){
-    SKIP_UTF8(z);
+    SKIP_UTF8(z,zTerm);
     r++;
   }
   return r;

_______________________________________________
sqlite-users mailing list
sqlite-users@mailinglists.sqlite.org
http://mailinglists.sqlite.org/cgi-bin/mailman/listinfo/sqlite-users

Reply via email to