Passes SQLite's tests, passes Gears tests.  I'll be running it against
some longer tests in parallel to sending it out for review.

While this code may need some thought to understand the context and
see that it doesn't break anything that it shouldn't, I _believe_ that
none of the changes have non-local impact.  So you should be able to
review each function in isolation.

-scott


-------------------

Hello michaeln,

I'd like you to do a code review.  Please execute
        g4 diff -c 8305435

or point your web browser to
        http://mondrian/8305435

to review the following code:

Change 8305435 by [EMAIL PROTECTED] on 2008/09/17 17:26:04 *pending*

        Detect and handle certain corruption cases for fts2,
        concentrating on high-level structural issues not trying to
        detect corruption when decoding, for now.  These cases handle
        all the in-the-wild corruption examples I have but one.
        
        - During a query, detect when the fts index references a docid
          which doesn't exist in the content table.
        
        - Detect when leavesReaderInit() receives an empty leaf node,
          or a leaf node which begins with a character other than nul.
          Similar mod to leavesReaderStep().
        
        - Similar changes to interior-node handling in
          loadAndGetChildrenContaining(), and loadSegment().
        
        - In various places detects if values of the wrong type are
          received from SQL queries.  I have audited the code, and all
          stored values are bound using a type appropriate to their
          column, so any mismatch means that we are either reading
          random data, or valid SQLite data from a page which wasn't
          previously part of an fts table (as if a page from another
          table were re-used).
        
        PRESUBMIT=passed
        R=michaeln
        [EMAIL PROTECTED]
        DELTA=136  (115 added, 3 deleted, 18 changed)
        OCL=8305435

Affected files ...

... 
//depot/googleclient/gears/opensource/third_party/sqlite_google/ext/fts2/fts2.c#5
 edit

136 delta lines: 115 added, 3 deleted, 18 changed

If you can't do the review, please let me know as soon as possible.  During
your review, please ensure that all new code has corresponding unit tests and
that existing unit tests are updated appropriately.  Visit
http://www/eng/code_review.html for more information.

This is a semiautomated message from "g4 mail".  Complaints or suggestions?
Mail [EMAIL PROTECTED]
Change 8305435 by [EMAIL PROTECTED] on 2008/09/17 17:26:04 *pending*

        Detect and handle certain corruption cases for fts2,
        concentrating on high-level structural issues not trying to
        detect corruption when decoding, for now.  These cases handle
        all the in-the-wild corruption examples I have but one.
        
        - During a query, detect when the fts index references a docid
          which doesn't exist in the content table.
        
        - Detect when leavesReaderInit() receives an empty leaf node,
          or a leaf node which begins with a character other than nul.
          Similar mod to leavesReaderStep().
        
        - Similar changes to interior-node handling in
          loadAndGetChildrenContaining(), and loadSegment().
        
        - In various places detects if values of the wrong type are
          received from SQL queries.  I have audited the code, and all
          stored values are bound using a type appropriate to their
          column, so any mismatch means that we are either reading
          random data, or valid SQLite data from a page which wasn't
          previously part of an fts table (as if a page from another
          table were re-used).
        
        OCL=8305435

Affected files ...

... 
//depot/googleclient/gears/opensource/third_party/sqlite_google/ext/fts2/fts2.c#5
 edit

==== 
//depot/googleclient/gears/opensource/third_party/sqlite_google/ext/fts2/fts2.c#5
 - 
/home/shess/googlesrc/gears_import/googleclient/gears/opensource/third_party/sqlite_google/ext/fts2/fts2.c
 ====
# action=edit type=text
--- googleclient/gears/opensource/third_party/sqlite_google/ext/fts2/fts2.c     
2008-09-15 15:33:02.000000000 -0700
+++ googleclient/gears/opensource/third_party/sqlite_google/ext/fts2/fts2.c     
2008-09-17 15:52:41.000000000 -0700
@@ -347,6 +347,16 @@
 # define TRACE(A)  printf A; fflush(stdout)
 #else
 # define TRACE(A)
+#endif
+
+#if 0
+/* Useful to set breakpoints.  See main.c sqlite3Corrupt(). */
+static int fts2Corrupt(void){
+  return SQLITE_CORRUPT;
+}
+# define SQLITE_CORRUPT_BKPT fts2Corrupt()
+#else
+# define SQLITE_CORRUPT_BKPT SQLITE_CORRUPT
 #endif
 
 /* It is not safe to call isspace(), tolower(), or isalnum() on
@@ -3435,8 +3445,11 @@
       c->eof = 0;
       return SQLITE_OK;
     }
-    /* an error occurred; abort */
-    return rc==SQLITE_DONE ? SQLITE_ERROR : rc;
+
+    /* Corrupt if the index refers to missing document. */
+    if( rc==SQLITE_DONE ) return SQLITE_CORRUPT_BKPT;
+
+    return rc;
   }
 }
 
@@ -5091,6 +5104,9 @@
 ** the leaf data was entirely contained in the root), or from the
 ** stream of blocks between iStartBlockid and iEndBlockid, inclusive.
 */
+/* TODO(shess): Figure out a means of indicating how many leaves are
+** expected, for purposes of detecting corruption.
+*/
 static int leavesReaderInit(fulltext_vtab *v,
                             int idx,
                             sqlite_int64 iStartBlockid,
@@ -5102,6 +5118,10 @@
 
   dataBufferInit(&pReader->rootData, 0);
   if( iStartBlockid==0 ){
+    /* Corrupt if this can't be a leaf node. */
+    if( pRootData==NULL || nRootData<1 || pRootData[0]!='\0' ){
+      return SQLITE_CORRUPT_BKPT;
+    }
     /* Entire leaf level fit in root data. */
     dataBufferReplace(&pReader->rootData, pRootData, nRootData);
     leafReaderInit(pReader->rootData.pData, pReader->rootData.nData,
@@ -5112,22 +5132,48 @@
     if( rc!=SQLITE_OK ) return rc;
 
     rc = sqlite3_bind_int64(s, 1, iStartBlockid);
-    if( rc!=SQLITE_OK ) return rc;
+    if( rc!=SQLITE_OK ) goto err;
 
     rc = sqlite3_bind_int64(s, 2, iEndBlockid);
-    if( rc!=SQLITE_OK ) return rc;
+    if( rc!=SQLITE_OK ) goto err;
 
     rc = sqlite3_step(s);
+
+    /* Corrupt if interior node referenced missing leaf node. */
     if( rc==SQLITE_DONE ){
-      pReader->eof = 1;
-      return SQLITE_OK;
-    }
-    if( rc!=SQLITE_ROW ) return rc;
+      rc = SQLITE_CORRUPT_BKPT;
+      goto err;
+    }
+
+    if( rc!=SQLITE_ROW ) goto err;
+    rc = SQLITE_OK;
+
+    /* Corrupt if leaf data isn't a blob. */
+    if( sqlite3_column_type(s, 0)!=SQLITE_BLOB ){
+      rc = SQLITE_CORRUPT_BKPT;
+    }else{
+      const char *pLeafData = sqlite3_column_blob(s, 0);
+      int nLeafData = sqlite3_column_bytes(s, 0);
+
+      /* Corrupt if this can't be a leaf node. */
+      if( pLeafData==NULL || nLeafData<1 || pLeafData[0]!='\0' ){
+        rc = SQLITE_CORRUPT_BKPT;
+      }else{
+        leafReaderInit(pLeafData, nLeafData, &pReader->leafReader);
+      }
+    }
+
+ err:
+    if( rc!=SQLITE_OK ){
+      if( idx==-1 ){
+        sqlite3_finalize(s);
+      }else{
+        sqlite3_reset(s);
+      }
+      return rc;
+    }
 
     pReader->pStmt = s;
-    leafReaderInit(sqlite3_column_blob(pReader->pStmt, 0),
-                   sqlite3_column_bytes(pReader->pStmt, 0),
-                   &pReader->leafReader);
   }
   return SQLITE_OK;
 }
@@ -5150,10 +5196,22 @@
       pReader->eof = 1;
       return rc==SQLITE_DONE ? SQLITE_OK : rc;
     }
-    leafReaderDestroy(&pReader->leafReader);
-    leafReaderInit(sqlite3_column_blob(pReader->pStmt, 0),
-                   sqlite3_column_bytes(pReader->pStmt, 0),
-                   &pReader->leafReader);
+
+    /* Corrupt if leaf data isn't a blob. */
+    if( sqlite3_column_type(pReader->pStmt, 0)!=SQLITE_BLOB ){
+      return SQLITE_CORRUPT_BKPT;
+    }else{
+      const char *pLeafData = sqlite3_column_blob(pReader->pStmt, 0);
+      int nLeafData = sqlite3_column_bytes(pReader->pStmt, 0);
+
+      /* Corrupt if this can't be a leaf node. */
+      if( pLeafData==NULL || nLeafData<1 || pLeafData[0]!='\0' ){
+        return SQLITE_CORRUPT_BKPT;
+      }
+
+      leafReaderDestroy(&pReader->leafReader);
+      leafReaderInit(pLeafData, nLeafData, &pReader->leafReader);
+    }
   }
   return SQLITE_OK;
 }
@@ -5215,6 +5273,14 @@
     const char *pRootData = sqlite3_column_blob(s, 2);
     int nRootData = sqlite3_column_bytes(s, 2);
 
+    /* Corrupt if we get back different types than we stored. */
+    if( sqlite3_column_type(s, 0)!=SQLITE_INTEGER ||
+        sqlite3_column_type(s, 1)!=SQLITE_INTEGER ||
+        sqlite3_column_type(s, 2)!=SQLITE_BLOB ){
+      rc = SQLITE_CORRUPT_BKPT;
+      break;
+    }
+
     assert( i<MERGE_COUNT );
     rc = leavesReaderInit(v, i, iStart, iEnd, pRootData, nRootData,
                           &pReaders[i]);
@@ -5226,6 +5292,7 @@
     while( i-->0 ){
       leavesReaderDestroy(&pReaders[i]);
     }
+    sqlite3_reset(s);          /* So we don't leave a lock. */
     return rc;
   }
 
@@ -5602,11 +5669,27 @@
   if( rc!=SQLITE_OK ) return rc;
 
   rc = sqlite3_step(s);
-  if( rc==SQLITE_DONE ) return SQLITE_ERROR;
+  /* Corrupt if interior node references missing child node. */
+  if( rc==SQLITE_DONE ) return SQLITE_CORRUPT_BKPT;
   if( rc!=SQLITE_ROW ) return rc;
 
-  getChildrenContaining(sqlite3_column_blob(s, 0), sqlite3_column_bytes(s, 0),
-                        pTerm, nTerm, isPrefix, piStartChild, piEndChild);
+  /* Corrupt if child node isn't a blob. */
+  if( sqlite3_column_type(s, 0)!=SQLITE_BLOB ){
+    sqlite3_reset(s);         /* So we don't leave a lock. */
+    return SQLITE_CORRUPT_BKPT;
+  }else{
+    const char *pData = sqlite3_column_blob(s, 0);
+    int nData = sqlite3_column_bytes(s, 0);
+
+    /* Corrupt if child is not a valid interior node. */
+    if( pData==NULL || nData<1 || pData[0]=='\0' ){
+      sqlite3_reset(s);         /* So we don't leave a lock. */
+      return SQLITE_CORRUPT_BKPT;
+    }
+
+    getChildrenContaining(pData, nData, pTerm, nTerm,
+                          isPrefix, piStartChild, piEndChild);
+  }
 
   /* We expect only one row.  We must execute another sqlite3_step()
    * to complete the iteration; otherwise the table will remain
@@ -5689,7 +5772,8 @@
   DataBuffer result;
   int rc;
 
-  assert( nData>1 );
+  /* Corrupt if segment root can't be valid. */
+  if( pData==NULL || nData<1 ) return SQLITE_CORRUPT_BKPT;
 
   /* This code should never be called with buffered updates. */
   assert( v->nPendingData<0 );
@@ -5743,6 +5827,14 @@
     const char *pData = sqlite3_column_blob(s, 2);
     const int nData = sqlite3_column_bytes(s, 2);
     const sqlite_int64 iLeavesEnd = sqlite3_column_int64(s, 1);
+
+    /* Corrupt if we get back different types than we stored. */
+    if( sqlite3_column_type(s, 1)!=SQLITE_INTEGER ||
+        sqlite3_column_type(s, 2)!=SQLITE_BLOB ){
+      rc = SQLITE_CORRUPT_BKPT;
+      goto err;
+    }
+
     rc = loadSegment(v, pData, nData, iLeavesEnd, pTerm, nTerm, isPrefix,
                      &doclist);
     if( rc!=SQLITE_OK ) goto err;
@@ -5762,6 +5854,7 @@
   }
 
  err:
+  sqlite3_reset(s);         /* So we don't leave a lock. */
   dataBufferDestroy(&doclist);
   return rc;
 }
@@ -6254,6 +6347,14 @@
       const char *pRootData = sqlite3_column_blob(s, 2);
       int nRootData = sqlite3_column_bytes(s, 2);
 
+      /* Corrupt if we get back different types than we stored. */
+      if( sqlite3_column_type(s, 0)!=SQLITE_INTEGER ||
+          sqlite3_column_type(s, 1)!=SQLITE_INTEGER ||
+          sqlite3_column_type(s, 2)!=SQLITE_BLOB ){
+        rc = SQLITE_CORRUPT_BKPT;
+        break;
+      }
+
       assert( i<nReaders );
       rc = leavesReaderInit(v, -1, iStart, iEnd, pRootData, nRootData,
                             &readers[i].reader);
@@ -6267,6 +6368,8 @@
     if( rc==SQLITE_DONE ){
       assert( i==nReaders );
       rc = optimizeInternal(v, readers, nReaders, &writer);
+    }else{
+      sqlite3_reset(s);      /* So we don't leave a lock. */
     }
 
     while( i-- > 0 ){
@@ -6330,9 +6433,18 @@
   const sqlite_int64 iEndBlockid = sqlite3_column_int64(s, 1);
   const char *pRootData = sqlite3_column_blob(s, 2);
   const int nRootData = sqlite3_column_bytes(s, 2);
+  int rc;
   LeavesReader reader;
-  int rc = leavesReaderInit(v, 0, iStartBlockid, iEndBlockid,
-                            pRootData, nRootData, &reader);
+
+  /* Corrupt if we get back different types than we stored. */
+  if( sqlite3_column_type(s, 0)!=SQLITE_INTEGER ||
+      sqlite3_column_type(s, 1)!=SQLITE_INTEGER ||
+      sqlite3_column_type(s, 2)!=SQLITE_BLOB ){
+    return SQLITE_CORRUPT_BKPT;
+  }
+
+  rc = leavesReaderInit(v, 0, iStartBlockid, iEndBlockid,
+                        pRootData, nRootData, &reader);
   if( rc!=SQLITE_OK ) return rc;
 
   while( rc==SQLITE_OK && !leavesReaderAtEnd(&reader) ){

Reply via email to