Re: [libdbi-users] PATCH: sqlite3 parser bugs
Vikram Ambrose writes: > And here is an update to the patch for a third bug. Better late than never ... I've applied your patch, and things seem to work ok. Thanks for being this patient. regards, Markus -- Markus Hoenicka http://www.mhoenicka.de AQ score 38 -- The ultimate all-in-one performance toolkit: Intel(R) Parallel Studio XE: Pinpoint memory and threading errors before they happen. Find and fix more than 250 security defects in the development cycle. Locate bottlenecks in serial and parallel code that limit performance. http://p.sf.net/sfu/intel-dev2devfeb ___ libdbi-users mailing list libdbi-users@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/libdbi-users
Re: [libdbi-users] PATCH: sqlite3 parser bugs
On 12/25/2010 08:34 PM, Vikram Ambrose wrote: The first issue I had was that the parser was unable to find Tables in the SQL statement. This was because the parser assumed that the only possible white space around keywords was ' ' or 0x20; [...] The second bug was that the parser would always read past the first "endword" if there was a JOIN just before it. Resulting in tables being added to the list that don't exist. (eg. Artist.name, not a table but a field). Thus in order to fix this you need to check for endwords right away if you're parsing through JOINs. And here is an update to the patch for a third bug. The third bug is where the driver is unable to find functions in the SQL query. [...] if ( function_flag == 1 ) { free(statement_copy); strcpy(curr_field,itemstore); [...] "itemstore" is a pointer into "statement_copy", and as such you cannot free it before the strcpy(). The patch moves the free() after the strcpy() and the parser is now able to correctly identify function return types. Again, 380 tests pass, 0 fail. By the way, Merry Christmas and a Happy New Year to everyone. Vikram. Index: libdbi-drivers/drivers/sqlite3/dbd_sqlite3.c === --- libdbi-drivers.orig/drivers/sqlite3/dbd_sqlite3.c 2010-12-25 20:11:38.603387002 -0500 +++ libdbi-drivers/drivers/sqlite3/dbd_sqlite3.c 2010-12-28 04:07:47.433387000 -0500 @@ -771,10 +771,11 @@ //printf("curr_table before getTables = %s\n",curr_table); table_count = getTables(tables,0,statement_copy,curr_table); //printf("curr_table after getTables = %s\n",curr_table); + //printf("%s\n",statement_copy); //printf("*TABLELIST\n"); - // for ( counter = 0 ; counter < table_count ; counter++) { - // //printf("%s\n",tables[counter]); - // } + //for ( counter = 0 ; counter < table_count ; counter++) { + // printf("[%i] %s\n",counter,tables[counter]); + //} // resolve our curr_field to a real column char* token; @@ -872,7 +873,7 @@ } } } - } + } // if item == itemstore else { // we have a function here @@ -928,7 +929,6 @@ return FIELD_TYPE_STRING; } if ( function_flag == 1 ) { - free(statement_copy); // itemstore has at least the functionname( in it strcpy(curr_field,itemstore); strcpy(curr_field_lower, curr_field); @@ -937,7 +937,8 @@ *item = (char)tolower((int)*item); item++; } - //printf("Field is a function - %s\n",curr_field_lower); +//printf("Field is a function - %s\n",curr_field_lower); + free(statement_copy); if ( strstr(curr_field_lower,"avg(") || strstr(curr_field_lower,"sum(") || strstr(curr_field_lower,"total(") || @@ -1180,6 +1181,27 @@ return type; } +/* Similar to C99 strstr() except ensures that there is + * white space around needle. */ +char *strstr_ws(const char *haystack, const char *needle){ + const char *c = NULL; + int len; + + len = strlen(needle); + + c = haystack; + + while( (c = strstr(c,needle)) != NULL){ + if(c == haystack) return NULL; + if( ( *(c-1) == ' ' || *(c-1) == '\t' || *(c-1) == '\n') + && + ( c[len] == ' ' || c[len] == '\t' || c[len] == '\n')) + return (char*) c; + } + + return NULL; +} + int getTables(char** tables, int index, const char* statement, char* curr_table) { //printf("getTables\n"); /* printf("processing %s\n",statement); */ @@ -1195,16 +1217,16 @@ char* endwords[] = {"where","group","having","union","intersect","except","order","limit"}; char* nottables[] = {"natural","left","right","full","outer","inner","cross","join","as","on"}; - if ( !(item = strstr(statement, " from ")) ) { -if ( !(item = strstr(statement, " FROM ")) ) + if ( !(item = strstr_ws(statement, "from")) ) { +if ( !(item = strstr_ws(statement, "FROM")) ) /* printf("no from clause\n"); */ return index; } - item += 6; + item += strlen("from"); while ( *item != '\0' ) { /* printf("begin parsing at %s\n", item); */ -if ( *item == ' ' || *item == ',' ) { +if ( *item == ' '|| *item == '\t' || *item == ',' ) { item++; } else { @@ -1229,7 +1251,8 @@ } else { /* printf("actual word starting here: %s\n", item); */ - while ( *item && *item != ',' && *item != ' ' && *item != ')' && *item != ';' ) { + while ( *item && *item != ',' && *item != ' ' && *item != '\t' + && *item != ')' && *item != ';' ) { item++; } char word[item-start+1]; @@ -1256,11 +1279,11 @@ /* printf("not a table: %s\n", word_lower); */ // if we encounter join or as we set // a flag because we know what to do next - if ( strcmp(nottables[7],word_lower) == 0 ) { + if ( strcmp("join",word_lower) == 0 ) { //printf("join found\n"); join_flag = 1; } - if ( strcmp(nottables[8
[libdbi-users] PATCH: sqlite3 parser bugs
I'm working on a patch for the sqlite3 driver to resolve some of the issues I've been having. The first issue I had was that the parser was unable to find Tables in the SQL statement. This was because the parser assumed that the only possible white space around keywords was ' ' or 0x20; eg. strstr(statement," FROM "); If you had SQL formatted like; SELECT foo\nFROM bar; or SELECT foo\n\tFROM bar; The parser would fail to find 'bar' because there isn't a ' ' character before FROM. This was a problem for all keywords. I think using strtok would have been a better approach than going char by char. But since the parser works in general, I didn't want to rewrite it all. The second bug was that the parser would always read past the first "endword" if there was a JOIN just before it. Resulting in tables being added to the list that don't exist. (eg. Artist.name, not a table but a field). Thus in order to fix this you need to check for endwords right away if you're parsing through JOINs. I'm still having all sorts of issues with the sqlite3 driver but these two things have at least got my application running :) I also removed some magic references to nottables[] and added some comments to denote the end of large statements. I ran make check with the patch and 380 tests passed, 0 failed. Let me know what you think of the patch. On a side note, write performance to sqlite3 db is incredibly poor (>1s for each INSERT regardless of content or no. of fields). But read performance is great. Any tips on how to tune sqlite3? Vikram. Index: libdbi-drivers/drivers/sqlite3/dbd_sqlite3.c === --- libdbi-drivers.orig/drivers/sqlite3/dbd_sqlite3.c 2010-12-25 20:11:38.603387002 -0500 +++ libdbi-drivers/drivers/sqlite3/dbd_sqlite3.c 2010-12-25 20:12:05.273387001 -0500 @@ -771,10 +771,11 @@ //printf("curr_table before getTables = %s\n",curr_table); table_count = getTables(tables,0,statement_copy,curr_table); //printf("curr_table after getTables = %s\n",curr_table); - //printf("*TABLELIST\n"); - // for ( counter = 0 ; counter < table_count ; counter++) { - // //printf("%s\n",tables[counter]); - // } + //printf("%s\n",statement_copy); + printf("*TABLELIST\n"); + for ( counter = 0 ; counter < table_count ; counter++) { + printf("[%i] %s\n",counter,tables[counter]); + } // resolve our curr_field to a real column char* token; @@ -1180,6 +1181,27 @@ return type; } +/* Similar to C99 strstr() except ensures that there is + * white space around needle. */ +char *strstr_ws(const char *haystack, const char *needle){ + const char *c = NULL; + int len; + + len = strlen(needle); + + c = haystack; + + while( (c = strstr(c,needle)) != NULL){ + if(c == haystack) return NULL; + if( ( *(c-1) == ' ' || *(c-1) == '\t' || *(c-1) == '\n') + && + ( c[len] == ' ' || c[len] == '\t' || c[len] == '\n')) + return (char*) c; + } + + return NULL; +} + int getTables(char** tables, int index, const char* statement, char* curr_table) { //printf("getTables\n"); /* printf("processing %s\n",statement); */ @@ -1195,16 +1217,16 @@ char* endwords[] = {"where","group","having","union","intersect","except","order","limit"}; char* nottables[] = {"natural","left","right","full","outer","inner","cross","join","as","on"}; - if ( !(item = strstr(statement, " from ")) ) { -if ( !(item = strstr(statement, " FROM ")) ) + if ( !(item = strstr_ws(statement, "from")) ) { +if ( !(item = strstr_ws(statement, "FROM")) ) /* printf("no from clause\n"); */ return index; } - item += 6; + item += strlen("from"); while ( *item != '\0' ) { /* printf("begin parsing at %s\n", item); */ -if ( *item == ' ' || *item == ',' ) { +if ( *item == ' '|| *item == '\t' || *item == ',' ) { item++; } else { @@ -1229,7 +1251,8 @@ } else { /* printf("actual word starting here: %s\n", item); */ - while ( *item && *item != ',' && *item != ' ' && *item != ')' && *item != ';' ) { + while ( *item && *item != ',' && *item != ' ' && *item != '\t' + && *item != ')' && *item != ';' ) { item++; } char word[item-start+1]; @@ -1256,11 +1279,11 @@ /* printf("not a table: %s\n", word_lower); */ // if we encounter join or as we set // a flag because we know what to do next - if ( strcmp(nottables[7],word_lower) == 0 ) { + if ( strcmp("join",word_lower) == 0 ) { //printf("join found\n"); join_flag = 1; } - if ( strcmp(nottables[8],word_lower) == 0 ) { + if ( strcmp("as",word_lower) == 0 ) { //printf("as found\n"); as_flag = 1; } @@ -1296,7 +1319,7 @@ join_flag = 0; int skip_flag = 1; while ( skip_flag == 1 ) { - if ( *item == ' ') { + if ( *item == ' '|| *it