Hi, I think I may have discovered a bug in SQLite3. It is a timing bug that
is easily reproduced with my (unfortunately proprietary) code. I'll try to
describe the situation as best as possible. This is using SQLite source
from http://www.sqlite.org/sqlite-source-3_5_9.zip running on Mac OS X 10.5
Leopard Intel. We compile and link in our own version of SQLite directly
with our executable. We compile with the following SQLite flags:
-DSQLITE_DEBUG=1 \
-DSQLITE_THREADSAFE=1 \

I have many threads each calling sqlite3_prepare() at the same time for the
same table in the same database. If the timing works out, I get a check
failure.

Here is a (sanitized) backtrace of the failing thread from gdb:

#0  0x9658b2ee in __semwait_signal_nocancel ()
#1  0x9658aed2 in nanosleep$NOCANCEL$UNIX2003 ()
#2  0x9657d1b7 in usleep$NOCANCEL$UNIX2003 ()
#3  0x9659e9bb in abort ()
#4  0x0035fa9d in __eprintf (string=0x3c <Address 0x3c out of bounds>,
expression=0x3c <Address 0x3c out of bounds>, line=60, filename=0x3c
<Address 0x3c out of bounds>) at
/var/tmp/gcc/gcc-5465~16/src/gcc/libgcc2.c:1838
#5  0x0030a780 in sqlite3FaultEndBenign (id=0) at
.../path/to/sqlite/v3_5_9/files/fault.c:120 (see NOTE below)
#6  0x0033f7d5 in sqlite3VdbeExec (p=0xa4d938) at
.../path/to/sqlite/v3_5_9/files/vdbe.c:576
#7  0x0034ccaa in sqlite3Step (p=0xa4d938) at
.../path/to/sqlite/v3_5_9/files/vdbeapi.c:477
#8  0x0034cfe8 in sqlite3_step (pStmt=0xa4d938) at
.../path/to/sqlite/v3_5_9/files/vdbeapi.c:539
#9  0x00312aa0 in sqlite3_exec (db=0xa25188, zSql=0x1052c08 "CREATE TABLE
<table name> (id VARCHAR(128) primary key, <other fields>"..., xCallback=0,
pArg=0x0, pzErrMsg=0xb0101a58) at
.../path/to/sqlite/v3_5_9/files/legacy.c:72
#10 0x0032a5d8 in sqlite3InitCallback (pInit=0xb0101b40, argc=3,
argv=0xa47d74, azColName=0xa47d68) at
.../path/to/sqlite/v3_5_9/files/prepare.c:81
#11 0x00312bf8 in sqlite3_exec (db=0xa25188, zSql=0xa44808 "SELECT name,
rootpage, sql FROM 'main'.sqlite_master", xCallback=0x32a375
<sqlite3InitCallback>, pArg=0xb0101b40, pzErrMsg=0x0) at
.../path/to/sqlite/v3_5_9/files/legacy.c:103
#12 0x0032adcb in sqlite3InitOne (db=0xa25188, iDb=0, pzErrMsg=0xb0102174)
at .../path/to/sqlite/v3_5_9/files/prepare.c:326
#13 0x0032afc1 in sqlite3Init (db=0xa25188, pzErrMsg=0xb0102174) at
.../path/to/sqlite/v3_5_9/files/prepare.c:394
#14 0x0032b106 in sqlite3ReadSchema (pParse=0xb010216c) at
.../path/to/sqlite/v3_5_9/files/prepare.c:430
#15 0x002f606f in sqlite3LocateTable (pParse=0xb010216c, isView=0,
zName=0xa382b8 "<some table name>", zDbase=0x0) at
.../path/to/sqlite/v3_5_9/files/build.c:307
#16 0x0033102e in prepSelectStmt (pParse=0xb010216c, p=0xa428c8) at
.../path/to/sqlite/v3_5_9/files/select.c:1334
#17 0x0033465b in sqlite3SelectResolve (pParse=0xb010216c, p=0xa428c8,
pOuterNC=0x0) at .../path/to/sqlite/v3_5_9/files/select.c:2736
#18 0x00334fcc in sqlite3Select (pParse=0xb010216c, p=0xa428c8,
pDest=0xb0101fe0, pParent=0x0, parentTab=0, pParentAgg=0x0, aff=0x0) at
.../path/to/sqlite/v3_5_9/files/select.c:3079
#19 0x00323ef9 in yy_reduce (yypParser=0x1042c08, yyruleno=104) at
parse.y:364
#20 0x003267d5 in sqlite3Parser (yyp=0x1042c08, yymajor=1, yyminor={z =
0x10409c3 " ", dyn = 0, n = 1}, pParse=0xb010216c) at parse.c:3606
#21 0x00337d82 in sqlite3RunParser (pParse=0xb010216c, zSql=0x1040608
"SELECT <some fields>"..., pzErrMsg=0xb0102168) at
.../path/to/sqlite/v3_5_9/files/tokenize.c:454
#22 0x0032b63e in sqlite3Prepare (db=0xa25188, zSql=0x103d60c "SELECT <some
fields>"..., nBytes=956, saveSqlFlag=0, ppStmt=0xb0102410,
pzTail=0xb010240c) at .../path/to/sqlite/v3_5_9/files/prepare.c:565
#23 0x0032bb68 in sqlite3LockAndPrepare (db=0xa25188, zSql=0x103d60c "SELECT
<some fields>"..., nBytes=956, saveSqlFlag=0, ppStmt=0xb0102410,
pzTail=0xb010240c) at .../path/to/sqlite/v3_5_9/files/prepare.c:653
#24 0x0032bdca in sqlite3_prepare (db=0xa25188, zSql=0x103d60c "SELECT <some
fields>"..., nBytes=956, ppStmt=0xb0102410, pzTail=0xb010240c) at
.../path/to/sqlite/v3_5_9/files/prepare.c:709
#25.. (functions in our software)
NOTE: in frame #5 gdb says id is 0. it was passed in as -1, but the function
modified it to 0 before the backtrace was printed

I added some logging to investigate how this happens, and found
that sqlite3FaultBeginBenign() and sqlite3FaultEndBenign() are being called
with -1 as the only argument by many threads simultaneously.

I added code to log entrances and exits to these functions with a negative
argument is passed, and to occasionally sleep() in sqlite3FaultEndBenign()
and to dump the last 20 log messages to console occasionally. I found this
contiguous sequence of events:

— thread id 12803 enters sqlite3FaultEndBenign()
— thread id 12803 exits sqlite3FaultEndBenign()
— thread id 13059 exits sqlite3FaultEndBenign()

This is a bug, because one thread (id 13059) is
inside sqlite3FaultEndBenign() while another thread (id 12803) calls the
same function. Inside sqlite3FaultEndBenign(), a static variable, aFault, is
accessed (operator--, which i believe is read,modify,write) without the
protection of a mutex.

I would like to help those with more sqlite smarts than I fix this bug
quickly, so I would be happy to help out with anything on my end.

Thanks for your time,
-andrew

For the curious, here is my log-enabled versions of the functions:

void LogFaultBegin(int x);
void LogFaultBegin2(int x);
void sqlite3FaultBeginBenign(int id){
  if( id<0 ){
    LogFaultBegin(id);  // Log entry
    for(id=0; id<SQLITE_FAULTINJECTOR_COUNT; id++){
      aFault[id].benign++;
    }
    LogFaultBegin2(id);  // Log exit
  }else{
    assert( id>=0 && id<SQLITE_FAULTINJECTOR_COUNT );
    aFault[id].benign++;
  }
}
void LogFaultEnd(int x);
void LogFaultEnd2(int x);
//void LogFaultAssert(int x);
int MaybeSleep(void);
void sqlite3FaultEndBenign(int id){
  int slept;
  if( id<0 ){
    LogFaultEnd(id);  // Log entry
    for(id=0; id<SQLITE_FAULTINJECTOR_COUNT; id++){
//      LogFaultAssert( aFault[id].benign>0 );
      assert( aFault[id].benign>0 );
      aFault[id].benign--;
    }
    slept = MaybeSleep();  // may sleep
    LogFaultEnd2(id);  // log exit
    if (slept)
      SqliteLogDump();  // if we slept, dump these log messages
  }else{
    assert( id>=0 && id<SQLITE_FAULTINJECTOR_COUNT );
    assert( aFault[id].benign>0 );
    aFault[id].benign--;
  }
}
_______________________________________________
sqlite-users mailing list
sqlite-users@sqlite.org
http://sqlite.org:8080/cgi-bin/mailman/listinfo/sqlite-users

Reply via email to