[sqlite] json1.c: isalnum(), isspace(), and isdigit() usage

2015-09-17 Thread Ralf Junker
On 17.09.2015 20:14, Scott Hess wrote:

> The problem is that there are LOCALE settings where tolower() does things C
> programmers don't expect.  I think tr_TR was one case, the handling of 'I'
> (Google "tr_tr locale bug" and you'll see lots of people hitting the same
> general problem).  It isn't a problem of type safety, it's a problem that
> the same inputs might have different outputs for certain library functions
> when you change environment variables.  I don't remember whether there were
> specific problems with other ctype functions, or if I just thought it was a
> good idea to be careful, once I realized the class of problem.

And this check-in therefore misses the point as it does not address this 
LOCALE problem IMHO:

http://www.sqlite.org/src/info/6713e35b8a8c997a

Ralf


[sqlite] json1.c: isalnum(), isspace(), and isdigit() usage

2015-09-17 Thread Jan Nijtmans
2015-08-26 19:03 GMT+02:00 Ralf Junker :
> ext/misc/json1.c uses the following functions from the C library:
>
> isalnum(): http://www.sqlite.org/src/artifact/541004e47235cefc?ln=564
> isspace(): http://www.sqlite.org/src/artifact/541004e47235cefc?ln=635
> isdigit(): http://www.sqlite.org/src/artifact/541004e47235cefc?ln=829

> Shouldn't json1.c avoid them for the same reasons?

Simpler is: cast the argument to (unsigned char), that
has the same effect (but is more efficient).

Proposed patch below.

Thanks!

Regards,
  Jan Nijtmans
==
--- ext/misc/json1.c
+++ ext/misc/json1.c
@@ -583,18 +583,18 @@
   char c;
   u32 j;
   int iThis;
   int x;
   JsonNode *pNode;
-  while( isspace(pParse->zJson[i]) ){ i++; }
+  while( isspace((unsigned char)pParse->zJson[i]) ){ i++; }
   if( (c = pParse->zJson[i])==0 ) return 0;
   if( c=='{' ){
 /* Parse object */
 iThis = jsonParseAddNode(pParse, JSON_OBJECT, 0, 0);
 if( iThis<0 ) return -1;
 for(j=i+1;;j++){
-  while( isspace(pParse->zJson[j]) ){ j++; }
+  while( isspace((unsigned char)pParse->zJson[j]) ){ j++; }
   x = jsonParseValue(pParse, j);
   if( x<0 ){
 if( x==(-2) && pParse->nNode==(u32)iThis+1 ) return j+1;
 return -1;
   }
@@ -601,17 +601,17 @@
   if( pParse->oom ) return -1;
   pNode = &pParse->aNode[pParse->nNode-1];
   if( pNode->eType!=JSON_STRING ) return -1;
   pNode->jnFlags |= JNODE_LABEL;
   j = x;
-  while( isspace(pParse->zJson[j]) ){ j++; }
+  while( isspace((unsigned char)pParse->zJson[j]) ){ j++; }
   if( pParse->zJson[j]!=':' ) return -1;
   j++;
   x = jsonParseValue(pParse, j);
   if( x<0 ) return -1;
   j = x;
-  while( isspace(pParse->zJson[j]) ){ j++; }
+  while( isspace((unsigned char)pParse->zJson[j]) ){ j++; }
   c = pParse->zJson[j];
   if( c==',' ) continue;
   if( c!='}' ) return -1;
   break;
 }
@@ -620,18 +620,18 @@
   }else if( c=='[' ){
 /* Parse array */
 iThis = jsonParseAddNode(pParse, JSON_ARRAY, 0, 0);
 if( iThis<0 ) return -1;
 for(j=i+1;;j++){
-  while( isspace(pParse->zJson[j]) ){ j++; }
+  while( isspace((unsigned char)pParse->zJson[j]) ){ j++; }
   x = jsonParseValue(pParse, j);
   if( x<0 ){
 if( x==(-3) && pParse->nNode==(u32)iThis+1 ) return j+1;
 return -1;
   }
   j = x;
-  while( isspace(pParse->zJson[j]) ){ j++; }
+  while( isspace((unsigned char)pParse->zJson[j]) ){ j++; }
   c = pParse->zJson[j];
   if( c==',' ) continue;
   if( c!=']' ) return -1;
   break;
 }
@@ -656,21 +656,21 @@
 jsonParseAddNode(pParse, JSON_STRING, j+1-i, &pParse->zJson[i]);
 if( !pParse->oom ) pParse->aNode[pParse->nNode-1].jnFlags = jnFlags;
 return j+1;
   }else if( c=='n'
  && strncmp(pParse->zJson+i,"null",4)==0
- && !isalnum(pParse->zJson[i+4]) ){
+ && !isalnum((unsigned char)pParse->zJson[i+4]) ){
 jsonParseAddNode(pParse, JSON_NULL, 0, 0);
 return i+4;
   }else if( c=='t'
  && strncmp(pParse->zJson+i,"true",4)==0
- && !isalnum(pParse->zJson[i+4]) ){
+ && !isalnum((unsigned char)pParse->zJson[i+4]) ){
 jsonParseAddNode(pParse, JSON_TRUE, 0, 0);
 return i+4;
   }else if( c=='f'
  && strncmp(pParse->zJson+i,"false",5)==0
- && !isalnum(pParse->zJson[i+5]) ){
+ && !isalnum((unsigned char)pParse->zJson[i+5]) ){
 jsonParseAddNode(pParse, JSON_FALSE, 0, 0);
 return i+5;
   }else if( c=='-' || (c>='0' && c<='9') ){
 /* Parse number */
 u8 seenDP = 0;
@@ -729,11 +729,11 @@
   if( zJson==0 ) return 1;
   pParse->zJson = zJson;
   i = jsonParseValue(pParse, 0);
   if( pParse->oom ) i = -1;
   if( i>0 ){
-while( isspace(zJson[i]) ) i++;
+while( isspace((unsigned char)zJson[i]) ) i++;
 if( zJson[i] ) i = -1;
   }
   if( i<=0 ){
 if( pCtx!=0 ){
   if( pParse->oom ){
@@ -860,15 +860,15 @@
 pRoot->jnFlags |= JNODE_APPEND;
 pParse->aNode[iLabel].jnFlags |= JNODE_RAW;
   }
   return pNode;
 }
-  }else if( zPath[0]=='[' && isdigit(zPath[1]) ){
+  }else if( zPath[0]=='[' && isdigit((unsigned char)zPath[1]) ){
 if( pRoot->eType!=JSON_ARRAY ) return 0;
 i = 0;
 zPath++;
-while( isdigit(zPath[0]) ){
+while( isdigit((unsigned char)zPath[0]) ){
   i = i*10 + zPath[0] - '0';
   zPath++;
 }
 if( zPath[0]!=']' ){
   *pzErr = zPath;


[sqlite] json1.c: isalnum(), isspace(), and isdigit() usage

2015-09-17 Thread Scott Hess
On Thu, Sep 17, 2015 at 1:24 PM, Ralf Junker  wrote:

> On 17.09.2015 20:14, Scott Hess wrote:
>
>> The problem is that there are LOCALE settings where tolower() does things
>> C
>> programmers don't expect.  I think tr_TR was one case, the handling of 'I'
>> (Google "tr_tr locale bug" and you'll see lots of people hitting the same
>> general problem).  It isn't a problem of type safety, it's a problem that
>> the same inputs might have different outputs for certain library functions
>> when you change environment variables.  I don't remember whether there
>> were
>> specific problems with other ctype functions, or if I just thought it was
>> a
>> good idea to be careful, once I realized the class of problem.
>>
>
> And this check-in therefore misses the point as it does not address this
> LOCALE problem IMHO:
>
> http://www.sqlite.org/src/info/6713e35b8a8c997a


Hmm.  Well, it might miss _a_ point, while solidly landing some other point
:-).

Current fts3 seems to spread this code differently, under fts2.c it was
like:

https://code.google.com/p/chromium/codesearch#chromium/src/third_party/sqlite/src/ext/fts2/fts2.c&l=336
where it's not a wrapper around the library, instead it was a direct
implementation of the functions which only acted on 7-bit values in the
ASCII set.

I think these should really be in terms of sqlite3UpperToLower
and sqlite3CtypeMap.  That might be an issue to expose to an extension
sensibly.

-scott


[sqlite] json1.c: isalnum(), isspace(), and isdigit() usage

2015-09-17 Thread Scott Hess
The problem is that there are LOCALE settings where tolower() does things C
programmers don't expect.  I think tr_TR was one case, the handling of 'I'
(Google "tr_tr locale bug" and you'll see lots of people hitting the same
general problem).  It isn't a problem of type safety, it's a problem that
the same inputs might have different outputs for certain library functions
when you change environment variables.  I don't remember whether there were
specific problems with other ctype functions, or if I just thought it was a
good idea to be careful, once I realized the class of problem.

[My run-in with this issue was in development of fts2.c/fts3.c.  I'm sure
the same general series of events leads to similar local implementations in
other projects :-).]

-scott


On Thu, Sep 17, 2015 at 7:03 AM, Jan Nijtmans 
wrote:

> 2015-08-26 19:03 GMT+02:00 Ralf Junker :
> > ext/misc/json1.c uses the following functions from the C library:
> >
> > isalnum(): http://www.sqlite.org/src/artifact/541004e47235cefc?ln=564
> > isspace(): http://www.sqlite.org/src/artifact/541004e47235cefc?ln=635
> > isdigit(): http://www.sqlite.org/src/artifact/541004e47235cefc?ln=829
>
> > Shouldn't json1.c avoid them for the same reasons?
>
> Simpler is: cast the argument to (unsigned char), that
> has the same effect (but is more efficient).
>
> Proposed patch below.
>
> Thanks!
>
> Regards,
>   Jan Nijtmans
> ==
> --- ext/misc/json1.c
> +++ ext/misc/json1.c
> @@ -583,18 +583,18 @@
>char c;
>u32 j;
>int iThis;
>int x;
>JsonNode *pNode;
> -  while( isspace(pParse->zJson[i]) ){ i++; }
> +  while( isspace((unsigned char)pParse->zJson[i]) ){ i++; }
>if( (c = pParse->zJson[i])==0 ) return 0;
>if( c=='{' ){
>  /* Parse object */
>  iThis = jsonParseAddNode(pParse, JSON_OBJECT, 0, 0);
>  if( iThis<0 ) return -1;
>  for(j=i+1;;j++){
> -  while( isspace(pParse->zJson[j]) ){ j++; }
> +  while( isspace((unsigned char)pParse->zJson[j]) ){ j++; }
>x = jsonParseValue(pParse, j);
>if( x<0 ){
>  if( x==(-2) && pParse->nNode==(u32)iThis+1 ) return j+1;
>  return -1;
>}
> @@ -601,17 +601,17 @@
>if( pParse->oom ) return -1;
>pNode = &pParse->aNode[pParse->nNode-1];
>if( pNode->eType!=JSON_STRING ) return -1;
>pNode->jnFlags |= JNODE_LABEL;
>j = x;
> -  while( isspace(pParse->zJson[j]) ){ j++; }
> +  while( isspace((unsigned char)pParse->zJson[j]) ){ j++; }
>if( pParse->zJson[j]!=':' ) return -1;
>j++;
>x = jsonParseValue(pParse, j);
>if( x<0 ) return -1;
>j = x;
> -  while( isspace(pParse->zJson[j]) ){ j++; }
> +  while( isspace((unsigned char)pParse->zJson[j]) ){ j++; }
>c = pParse->zJson[j];
>if( c==',' ) continue;
>if( c!='}' ) return -1;
>break;
>  }
> @@ -620,18 +620,18 @@
>}else if( c=='[' ){
>  /* Parse array */
>  iThis = jsonParseAddNode(pParse, JSON_ARRAY, 0, 0);
>  if( iThis<0 ) return -1;
>  for(j=i+1;;j++){
> -  while( isspace(pParse->zJson[j]) ){ j++; }
> +  while( isspace((unsigned char)pParse->zJson[j]) ){ j++; }
>x = jsonParseValue(pParse, j);
>if( x<0 ){
>  if( x==(-3) && pParse->nNode==(u32)iThis+1 ) return j+1;
>  return -1;
>}
>j = x;
> -  while( isspace(pParse->zJson[j]) ){ j++; }
> +  while( isspace((unsigned char)pParse->zJson[j]) ){ j++; }
>c = pParse->zJson[j];
>if( c==',' ) continue;
>if( c!=']' ) return -1;
>break;
>  }
> @@ -656,21 +656,21 @@
>  jsonParseAddNode(pParse, JSON_STRING, j+1-i, &pParse->zJson[i]);
>  if( !pParse->oom ) pParse->aNode[pParse->nNode-1].jnFlags = jnFlags;
>  return j+1;
>}else if( c=='n'
>   && strncmp(pParse->zJson+i,"null",4)==0
> - && !isalnum(pParse->zJson[i+4]) ){
> + && !isalnum((unsigned char)pParse->zJson[i+4]) ){
>  jsonParseAddNode(pParse, JSON_NULL, 0, 0);
>  return i+4;
>}else if( c=='t'
>   && strncmp(pParse->zJson+i,"true",4)==0
> - && !isalnum(pParse->zJson[i+4]) ){
> + && !isalnum((unsigned char)pParse->zJson[i+4]) ){
>  jsonParseAddNode(pParse, JSON_TRUE, 0, 0);
>  return i+4;
>}else if( c=='f'
>   && strncmp(pParse->zJson+i,"false",5)==0
> - && !isalnum(pParse->zJson[i+5]) ){
> + && !isalnum((unsigned char)pParse->zJson[i+5]) ){
>  jsonParseAddNode(pParse, JSON_FALSE, 0, 0);
>  return i+5;
>}else if( c=='-' || (c>='0' && c<='9') ){
>  /* Parse number */
>  u8 seenDP = 0;
> @@ -729,11 +729,11 @@
>if( zJson==0 ) return 1;
>pParse->zJson = zJson;
>i = jsonParseValue(pParse, 0);
>if( pParse->oom ) i = -1;
>if( i>0 ){
> -while( isspace(zJson[i]) ) i++;
> +while( isspace((unsigned char)zJson[i]) ) i++;
>  if( zJson[i] ) i = -1;
>}
>if( i<

[sqlite] json1.c: isalnum(), isspace(), and isdigit() usage

2015-08-26 Thread Ralf Junker
ext/misc/json1.c uses the following functions from the C library:

isalnum(): http://www.sqlite.org/src/artifact/541004e47235cefc?ln=564
isspace(): http://www.sqlite.org/src/artifact/541004e47235cefc?ln=635
isdigit(): http://www.sqlite.org/src/artifact/541004e47235cefc?ln=829

Existing source code declares these as unsafe for hi-bit-set characters 
and introduces safe replacement versions independent of locale:

src/sqliteInt.h:
https://www.sqlite.org/src/artifact/424a2020efc9736c?ln=3092-3094

ext/fts2/fts2.c:
https://www.sqlite.org/src/artifact/72c816a9ae448049?ln=336-353

ext/fts3/fts3_tokenizer1.c:
https://www.sqlite.org/src/artifact/5c98225a53705e5e?ln=54-56

Shouldn't json1.c avoid them for the same reasons?

Ralf