Re: [libdbi-users] PATCH: sqlite3 parser bugs

2011-02-20 Thread markus . hoenicka
Vikram Ambrose writes:
  And here is an update to the patch for a third bug.

Better late than never ... I've applied your patch, and things seem to
work ok. Thanks for being this patient.

regards,
Markus

-- 
Markus Hoenicka
http://www.mhoenicka.de
AQ score 38

--
The ultimate all-in-one performance toolkit: Intel(R) Parallel Studio XE:
Pinpoint memory and threading errors before they happen.
Find and fix more than 250 security defects in the development cycle.
Locate bottlenecks in serial and parallel code that limit performance.
http://p.sf.net/sfu/intel-dev2devfeb
___
libdbi-users mailing list
libdbi-users@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/libdbi-users


Re: [libdbi-users] PATCH: sqlite3 parser bugs

2010-12-28 Thread Vikram Ambrose

On 12/25/2010 08:34 PM, Vikram Ambrose wrote:
The first issue I had was that the parser was unable to find Tables in 
the SQL statement. This was because the parser assumed that the only 
possible white space around keywords was ' ' or 0x20;


[...]

The second bug was that the parser would always read past the first 
endword if there was a JOIN just before it. Resulting in tables 
being added to the list that don't exist. (eg. Artist.name, not a 
table but a field). Thus in order to fix this you need to check for 
endwords right away if you're parsing through JOINs.





And here is an update to the patch for a third bug.

The third bug is where the driver is unable to find functions in the SQL 
query.


[...]
if ( function_flag == 1 ) {
   free(statement_copy);
   strcpy(curr_field,itemstore);
[...]

itemstore is a pointer into statement_copy, and as such you cannot 
free it before the strcpy().


The patch moves the free() after the strcpy() and the parser is now able 
to correctly identify function return types.


Again, 380 tests pass, 0 fail.

By the way, Merry Christmas and a Happy New Year to everyone.


Vikram.
Index: libdbi-drivers/drivers/sqlite3/dbd_sqlite3.c
===
--- libdbi-drivers.orig/drivers/sqlite3/dbd_sqlite3.c	2010-12-25 20:11:38.603387002 -0500
+++ libdbi-drivers/drivers/sqlite3/dbd_sqlite3.c	2010-12-28 04:07:47.433387000 -0500
@@ -771,10 +771,11 @@
   //printf(curr_table before getTables = %s\n,curr_table);
   table_count = getTables(tables,0,statement_copy,curr_table);
   //printf(curr_table after getTables = %s\n,curr_table);
+  //printf(%s\n,statement_copy);
   //printf(*TABLELIST\n);
-  //		for ( counter = 0 ; counter  table_count ; counter++) {
-  //			//printf(%s\n,tables[counter]);
-  //		}
+  //for ( counter = 0 ; counter  table_count ; counter++) {
+  //   printf([%i] %s\n,counter,tables[counter]);
+  //}
 
   // resolve our curr_field to a real column
   char* token;
@@ -872,7 +873,7 @@
 		  }
 		}
 	  }
-	}
+	} // if item == itemstore
 
 	else {
 	  // we have a function here
@@ -928,7 +929,6 @@
 	return FIELD_TYPE_STRING;
 	  }
 	  if ( function_flag == 1 ) {
-	free(statement_copy);
 	// itemstore has at least the functionname( in it
 	strcpy(curr_field,itemstore);
 	strcpy(curr_field_lower, curr_field);
@@ -937,7 +937,8 @@
 	  *item = (char)tolower((int)*item);
 	  item++;
 	}
-	//printf(Field is a function - %s\n,curr_field_lower);
+//printf(Field is a function - %s\n,curr_field_lower);
+	free(statement_copy);
 	if ( strstr(curr_field_lower,avg() ||
 		 strstr(curr_field_lower,sum() ||
 		 strstr(curr_field_lower,total() ||
@@ -1180,6 +1181,27 @@
   return type;
 }
 
+/* Similar to C99 strstr() except ensures that there is
+ * white space around needle. */
+char *strstr_ws(const char *haystack, const char *needle){
+   const char *c = NULL;
+   int len;
+
+   len = strlen(needle);
+
+   c = haystack;
+
+   while( (c = strstr(c,needle)) != NULL){
+  if(c == haystack) return NULL;
+  if( ( *(c-1)  == ' ' || *(c-1) == '\t' || *(c-1) == '\n')
+ 
+  ( c[len] == ' '  || c[len] == '\t' || c[len] == '\n'))
+   return (char*) c;
+   }
+
+   return NULL;
+}
+
 int getTables(char** tables, int index, const char* statement, char* curr_table) {
   //printf(getTables\n);
 /*   printf(processing %s\n,statement); */
@@ -1195,16 +1217,16 @@
   char* endwords[] = {where,group,having,union,intersect,except,order,limit};
   char* nottables[] = {natural,left,right,full,outer,inner,cross,join,as,on};
 
-  if ( !(item = strstr(statement,  from )) ) {
-if ( !(item = strstr(statement,  FROM )) )
+  if ( !(item = strstr_ws(statement, from)) ) {
+if ( !(item = strstr_ws(statement, FROM)) )
 /*   printf(no from clause\n); */
   return index;
   }
-  item += 6;
+  item += strlen(from);
 
   while ( *item != '\0' ) {
 /* printf(begin parsing at %s\n, item); */
-if ( *item == ' ' || *item == ',' ) {
+if ( *item == ' '|| *item == '\t' || *item == ',' ) {
   item++;
 }
 else {
@@ -1229,7 +1251,8 @@
   }
   else {
 /* 	printf(actual word starting here: %s\n, item); */
-	while ( *item  *item != ','  *item != ' '  *item != ')'  *item != ';' ) {
+	while ( *item  *item != ','  *item != ' '  *item != '\t'
+   *item != ')'  *item != ';' ) {
 	  item++;
 	}
 	char word[item-start+1];
@@ -1256,11 +1279,11 @@
 /* 	printf(not a table: %s\n, word_lower); */
 	// if we encounter join or as we set
 	// a flag because we know what to do next
-	if ( strcmp(nottables[7],word_lower) == 0 ) {
+	if ( strcmp(join,word_lower) == 0 ) {
 	  //printf(join found\n);
 	  join_flag = 1;
 	}
-	if ( strcmp(nottables[8],word_lower) == 0 ) {
+	if ( strcmp(as,word_lower) == 0 ) {
 	  //printf(as found\n);
 	  as_flag 

[libdbi-users] PATCH: sqlite3 parser bugs

2010-12-25 Thread Vikram Ambrose
I'm working on a patch for the sqlite3 driver to resolve some of the 
issues I've been having.


The first issue I had was that the parser was unable to find Tables in 
the SQL statement. This was because the parser assumed that the only 
possible white space around keywords was ' ' or 0x20;


eg. strstr(statement, FROM );

If you had SQL formatted like;

SELECT foo\nFROM bar;

or

SELECT foo\n\tFROM bar;

The parser would fail to find 'bar' because there isn't a ' ' character 
before FROM. This was a problem for all keywords. I think using strtok 
would have been a better approach than going char by char. But since the 
parser works in general, I didn't want to rewrite it all.


The second bug was that the parser would always read past the first 
endword if there was a JOIN just before it. Resulting in tables being 
added to the list that don't exist. (eg. Artist.name, not a table but a 
field). Thus in order to fix this you need to check for endwords right 
away if you're parsing through JOINs.


I'm still having all sorts of issues with the sqlite3 driver but these 
two things have at least got my application running :)


I also removed some magic references to nottables[] and added some 
comments to denote the end of large statements.


I ran make check with the patch and 380 tests passed, 0 failed.

Let me know what you think of the patch.

On a side note, write performance to sqlite3 db is incredibly poor (1s 
for each INSERT regardless of content or no. of fields). But read 
performance is great. Any tips on how to tune sqlite3?



Vikram.

Index: libdbi-drivers/drivers/sqlite3/dbd_sqlite3.c
===
--- libdbi-drivers.orig/drivers/sqlite3/dbd_sqlite3.c	2010-12-25 20:11:38.603387002 -0500
+++ libdbi-drivers/drivers/sqlite3/dbd_sqlite3.c	2010-12-25 20:12:05.273387001 -0500
@@ -771,10 +771,11 @@
   //printf(curr_table before getTables = %s\n,curr_table);
   table_count = getTables(tables,0,statement_copy,curr_table);
   //printf(curr_table after getTables = %s\n,curr_table);
-  //printf(*TABLELIST\n);
-  //		for ( counter = 0 ; counter  table_count ; counter++) {
-  //			//printf(%s\n,tables[counter]);
-  //		}
+  //printf(%s\n,statement_copy);
+  printf(*TABLELIST\n);
+  for ( counter = 0 ; counter  table_count ; counter++) {
+ printf([%i] %s\n,counter,tables[counter]);
+  }
 
   // resolve our curr_field to a real column
   char* token;
@@ -1180,6 +1181,27 @@
   return type;
 }
 
+/* Similar to C99 strstr() except ensures that there is
+ * white space around needle. */
+char *strstr_ws(const char *haystack, const char *needle){
+   const char *c = NULL;
+   int len;
+
+   len = strlen(needle);
+
+   c = haystack;
+
+   while( (c = strstr(c,needle)) != NULL){
+  if(c == haystack) return NULL;
+  if( ( *(c-1)  == ' ' || *(c-1) == '\t' || *(c-1) == '\n')
+ 
+  ( c[len] == ' '  || c[len] == '\t' || c[len] == '\n'))
+   return (char*) c;
+   }
+
+   return NULL;
+}
+
 int getTables(char** tables, int index, const char* statement, char* curr_table) {
   //printf(getTables\n);
 /*   printf(processing %s\n,statement); */
@@ -1195,16 +1217,16 @@
   char* endwords[] = {where,group,having,union,intersect,except,order,limit};
   char* nottables[] = {natural,left,right,full,outer,inner,cross,join,as,on};
 
-  if ( !(item = strstr(statement,  from )) ) {
-if ( !(item = strstr(statement,  FROM )) )
+  if ( !(item = strstr_ws(statement, from)) ) {
+if ( !(item = strstr_ws(statement, FROM)) )
 /*   printf(no from clause\n); */
   return index;
   }
-  item += 6;
+  item += strlen(from);
 
   while ( *item != '\0' ) {
 /* printf(begin parsing at %s\n, item); */
-if ( *item == ' ' || *item == ',' ) {
+if ( *item == ' '|| *item == '\t' || *item == ',' ) {
   item++;
 }
 else {
@@ -1229,7 +1251,8 @@
   }
   else {
 /* 	printf(actual word starting here: %s\n, item); */
-	while ( *item  *item != ','  *item != ' '  *item != ')'  *item != ';' ) {
+	while ( *item  *item != ','  *item != ' '  *item != '\t'
+   *item != ')'  *item != ';' ) {
 	  item++;
 	}
 	char word[item-start+1];
@@ -1256,11 +1279,11 @@
 /* 	printf(not a table: %s\n, word_lower); */
 	// if we encounter join or as we set
 	// a flag because we know what to do next
-	if ( strcmp(nottables[7],word_lower) == 0 ) {
+	if ( strcmp(join,word_lower) == 0 ) {
 	  //printf(join found\n);
 	  join_flag = 1;
 	}
-	if ( strcmp(nottables[8],word_lower) == 0 ) {
+	if ( strcmp(as,word_lower) == 0 ) {
 	  //printf(as found\n);
 	  as_flag = 1;
 	}
@@ -1296,7 +1319,7 @@
 	  join_flag = 0;
 	  int skip_flag = 1;
 	  while ( skip_flag == 1 ) {
-		if ( *item == ' ') {
+		if ( *item == ' '|| *item == '\t' ) {
 		  item++;
 		}
 		else {
@@ -1313,7 +1336,8 @@
 			opens--;
 		}
 		  }
-		  while (