[sqlite] json1.c: isalnum(), isspace(), and isdigit() usage
On 17.09.2015 20:14, Scott Hess wrote: > The problem is that there are LOCALE settings where tolower() does things C > programmers don't expect. I think tr_TR was one case, the handling of 'I' > (Google "tr_tr locale bug" and you'll see lots of people hitting the same > general problem). It isn't a problem of type safety, it's a problem that > the same inputs might have different outputs for certain library functions > when you change environment variables. I don't remember whether there were > specific problems with other ctype functions, or if I just thought it was a > good idea to be careful, once I realized the class of problem. And this check-in therefore misses the point as it does not address this LOCALE problem IMHO: http://www.sqlite.org/src/info/6713e35b8a8c997a Ralf
[sqlite] json1.c: isalnum(), isspace(), and isdigit() usage
2015-08-26 19:03 GMT+02:00 Ralf Junker : > ext/misc/json1.c uses the following functions from the C library: > > isalnum(): http://www.sqlite.org/src/artifact/541004e47235cefc?ln=564 > isspace(): http://www.sqlite.org/src/artifact/541004e47235cefc?ln=635 > isdigit(): http://www.sqlite.org/src/artifact/541004e47235cefc?ln=829 > Shouldn't json1.c avoid them for the same reasons? Simpler is: cast the argument to (unsigned char), that has the same effect (but is more efficient). Proposed patch below. Thanks! Regards, Jan Nijtmans == --- ext/misc/json1.c +++ ext/misc/json1.c @@ -583,18 +583,18 @@ char c; u32 j; int iThis; int x; JsonNode *pNode; - while( isspace(pParse->zJson[i]) ){ i++; } + while( isspace((unsigned char)pParse->zJson[i]) ){ i++; } if( (c = pParse->zJson[i])==0 ) return 0; if( c=='{' ){ /* Parse object */ iThis = jsonParseAddNode(pParse, JSON_OBJECT, 0, 0); if( iThis<0 ) return -1; for(j=i+1;;j++){ - while( isspace(pParse->zJson[j]) ){ j++; } + while( isspace((unsigned char)pParse->zJson[j]) ){ j++; } x = jsonParseValue(pParse, j); if( x<0 ){ if( x==(-2) && pParse->nNode==(u32)iThis+1 ) return j+1; return -1; } @@ -601,17 +601,17 @@ if( pParse->oom ) return -1; pNode = &pParse->aNode[pParse->nNode-1]; if( pNode->eType!=JSON_STRING ) return -1; pNode->jnFlags |= JNODE_LABEL; j = x; - while( isspace(pParse->zJson[j]) ){ j++; } + while( isspace((unsigned char)pParse->zJson[j]) ){ j++; } if( pParse->zJson[j]!=':' ) return -1; j++; x = jsonParseValue(pParse, j); if( x<0 ) return -1; j = x; - while( isspace(pParse->zJson[j]) ){ j++; } + while( isspace((unsigned char)pParse->zJson[j]) ){ j++; } c = pParse->zJson[j]; if( c==',' ) continue; if( c!='}' ) return -1; break; } @@ -620,18 +620,18 @@ }else if( c=='[' ){ /* Parse array */ iThis = jsonParseAddNode(pParse, JSON_ARRAY, 0, 0); if( iThis<0 ) return -1; for(j=i+1;;j++){ - while( isspace(pParse->zJson[j]) ){ j++; } + while( isspace((unsigned char)pParse->zJson[j]) ){ j++; } x = jsonParseValue(pParse, j); if( x<0 ){ if( x==(-3) && pParse->nNode==(u32)iThis+1 ) return j+1; return -1; } j = x; - while( isspace(pParse->zJson[j]) ){ j++; } + while( isspace((unsigned char)pParse->zJson[j]) ){ j++; } c = pParse->zJson[j]; if( c==',' ) continue; if( c!=']' ) return -1; break; } @@ -656,21 +656,21 @@ jsonParseAddNode(pParse, JSON_STRING, j+1-i, &pParse->zJson[i]); if( !pParse->oom ) pParse->aNode[pParse->nNode-1].jnFlags = jnFlags; return j+1; }else if( c=='n' && strncmp(pParse->zJson+i,"null",4)==0 - && !isalnum(pParse->zJson[i+4]) ){ + && !isalnum((unsigned char)pParse->zJson[i+4]) ){ jsonParseAddNode(pParse, JSON_NULL, 0, 0); return i+4; }else if( c=='t' && strncmp(pParse->zJson+i,"true",4)==0 - && !isalnum(pParse->zJson[i+4]) ){ + && !isalnum((unsigned char)pParse->zJson[i+4]) ){ jsonParseAddNode(pParse, JSON_TRUE, 0, 0); return i+4; }else if( c=='f' && strncmp(pParse->zJson+i,"false",5)==0 - && !isalnum(pParse->zJson[i+5]) ){ + && !isalnum((unsigned char)pParse->zJson[i+5]) ){ jsonParseAddNode(pParse, JSON_FALSE, 0, 0); return i+5; }else if( c=='-' || (c>='0' && c<='9') ){ /* Parse number */ u8 seenDP = 0; @@ -729,11 +729,11 @@ if( zJson==0 ) return 1; pParse->zJson = zJson; i = jsonParseValue(pParse, 0); if( pParse->oom ) i = -1; if( i>0 ){ -while( isspace(zJson[i]) ) i++; +while( isspace((unsigned char)zJson[i]) ) i++; if( zJson[i] ) i = -1; } if( i<=0 ){ if( pCtx!=0 ){ if( pParse->oom ){ @@ -860,15 +860,15 @@ pRoot->jnFlags |= JNODE_APPEND; pParse->aNode[iLabel].jnFlags |= JNODE_RAW; } return pNode; } - }else if( zPath[0]=='[' && isdigit(zPath[1]) ){ + }else if( zPath[0]=='[' && isdigit((unsigned char)zPath[1]) ){ if( pRoot->eType!=JSON_ARRAY ) return 0; i = 0; zPath++; -while( isdigit(zPath[0]) ){ +while( isdigit((unsigned char)zPath[0]) ){ i = i*10 + zPath[0] - '0'; zPath++; } if( zPath[0]!=']' ){ *pzErr = zPath;
[sqlite] json1.c: isalnum(), isspace(), and isdigit() usage
On Thu, Sep 17, 2015 at 1:24 PM, Ralf Junker wrote: > On 17.09.2015 20:14, Scott Hess wrote: > >> The problem is that there are LOCALE settings where tolower() does things >> C >> programmers don't expect. I think tr_TR was one case, the handling of 'I' >> (Google "tr_tr locale bug" and you'll see lots of people hitting the same >> general problem). It isn't a problem of type safety, it's a problem that >> the same inputs might have different outputs for certain library functions >> when you change environment variables. I don't remember whether there >> were >> specific problems with other ctype functions, or if I just thought it was >> a >> good idea to be careful, once I realized the class of problem. >> > > And this check-in therefore misses the point as it does not address this > LOCALE problem IMHO: > > http://www.sqlite.org/src/info/6713e35b8a8c997a Hmm. Well, it might miss _a_ point, while solidly landing some other point :-). Current fts3 seems to spread this code differently, under fts2.c it was like: https://code.google.com/p/chromium/codesearch#chromium/src/third_party/sqlite/src/ext/fts2/fts2.c&l=336 where it's not a wrapper around the library, instead it was a direct implementation of the functions which only acted on 7-bit values in the ASCII set. I think these should really be in terms of sqlite3UpperToLower and sqlite3CtypeMap. That might be an issue to expose to an extension sensibly. -scott
[sqlite] json1.c: isalnum(), isspace(), and isdigit() usage
The problem is that there are LOCALE settings where tolower() does things C programmers don't expect. I think tr_TR was one case, the handling of 'I' (Google "tr_tr locale bug" and you'll see lots of people hitting the same general problem). It isn't a problem of type safety, it's a problem that the same inputs might have different outputs for certain library functions when you change environment variables. I don't remember whether there were specific problems with other ctype functions, or if I just thought it was a good idea to be careful, once I realized the class of problem. [My run-in with this issue was in development of fts2.c/fts3.c. I'm sure the same general series of events leads to similar local implementations in other projects :-).] -scott On Thu, Sep 17, 2015 at 7:03 AM, Jan Nijtmans wrote: > 2015-08-26 19:03 GMT+02:00 Ralf Junker : > > ext/misc/json1.c uses the following functions from the C library: > > > > isalnum(): http://www.sqlite.org/src/artifact/541004e47235cefc?ln=564 > > isspace(): http://www.sqlite.org/src/artifact/541004e47235cefc?ln=635 > > isdigit(): http://www.sqlite.org/src/artifact/541004e47235cefc?ln=829 > > > Shouldn't json1.c avoid them for the same reasons? > > Simpler is: cast the argument to (unsigned char), that > has the same effect (but is more efficient). > > Proposed patch below. > > Thanks! > > Regards, > Jan Nijtmans > == > --- ext/misc/json1.c > +++ ext/misc/json1.c > @@ -583,18 +583,18 @@ >char c; >u32 j; >int iThis; >int x; >JsonNode *pNode; > - while( isspace(pParse->zJson[i]) ){ i++; } > + while( isspace((unsigned char)pParse->zJson[i]) ){ i++; } >if( (c = pParse->zJson[i])==0 ) return 0; >if( c=='{' ){ > /* Parse object */ > iThis = jsonParseAddNode(pParse, JSON_OBJECT, 0, 0); > if( iThis<0 ) return -1; > for(j=i+1;;j++){ > - while( isspace(pParse->zJson[j]) ){ j++; } > + while( isspace((unsigned char)pParse->zJson[j]) ){ j++; } >x = jsonParseValue(pParse, j); >if( x<0 ){ > if( x==(-2) && pParse->nNode==(u32)iThis+1 ) return j+1; > return -1; >} > @@ -601,17 +601,17 @@ >if( pParse->oom ) return -1; >pNode = &pParse->aNode[pParse->nNode-1]; >if( pNode->eType!=JSON_STRING ) return -1; >pNode->jnFlags |= JNODE_LABEL; >j = x; > - while( isspace(pParse->zJson[j]) ){ j++; } > + while( isspace((unsigned char)pParse->zJson[j]) ){ j++; } >if( pParse->zJson[j]!=':' ) return -1; >j++; >x = jsonParseValue(pParse, j); >if( x<0 ) return -1; >j = x; > - while( isspace(pParse->zJson[j]) ){ j++; } > + while( isspace((unsigned char)pParse->zJson[j]) ){ j++; } >c = pParse->zJson[j]; >if( c==',' ) continue; >if( c!='}' ) return -1; >break; > } > @@ -620,18 +620,18 @@ >}else if( c=='[' ){ > /* Parse array */ > iThis = jsonParseAddNode(pParse, JSON_ARRAY, 0, 0); > if( iThis<0 ) return -1; > for(j=i+1;;j++){ > - while( isspace(pParse->zJson[j]) ){ j++; } > + while( isspace((unsigned char)pParse->zJson[j]) ){ j++; } >x = jsonParseValue(pParse, j); >if( x<0 ){ > if( x==(-3) && pParse->nNode==(u32)iThis+1 ) return j+1; > return -1; >} >j = x; > - while( isspace(pParse->zJson[j]) ){ j++; } > + while( isspace((unsigned char)pParse->zJson[j]) ){ j++; } >c = pParse->zJson[j]; >if( c==',' ) continue; >if( c!=']' ) return -1; >break; > } > @@ -656,21 +656,21 @@ > jsonParseAddNode(pParse, JSON_STRING, j+1-i, &pParse->zJson[i]); > if( !pParse->oom ) pParse->aNode[pParse->nNode-1].jnFlags = jnFlags; > return j+1; >}else if( c=='n' > && strncmp(pParse->zJson+i,"null",4)==0 > - && !isalnum(pParse->zJson[i+4]) ){ > + && !isalnum((unsigned char)pParse->zJson[i+4]) ){ > jsonParseAddNode(pParse, JSON_NULL, 0, 0); > return i+4; >}else if( c=='t' > && strncmp(pParse->zJson+i,"true",4)==0 > - && !isalnum(pParse->zJson[i+4]) ){ > + && !isalnum((unsigned char)pParse->zJson[i+4]) ){ > jsonParseAddNode(pParse, JSON_TRUE, 0, 0); > return i+4; >}else if( c=='f' > && strncmp(pParse->zJson+i,"false",5)==0 > - && !isalnum(pParse->zJson[i+5]) ){ > + && !isalnum((unsigned char)pParse->zJson[i+5]) ){ > jsonParseAddNode(pParse, JSON_FALSE, 0, 0); > return i+5; >}else if( c=='-' || (c>='0' && c<='9') ){ > /* Parse number */ > u8 seenDP = 0; > @@ -729,11 +729,11 @@ >if( zJson==0 ) return 1; >pParse->zJson = zJson; >i = jsonParseValue(pParse, 0); >if( pParse->oom ) i = -1; >if( i>0 ){ > -while( isspace(zJson[i]) ) i++; > +while( isspace((unsigned char)zJson[i]) ) i++; > if( zJson[i] ) i = -1; >} >if( i<
[sqlite] json1.c: isalnum(), isspace(), and isdigit() usage
ext/misc/json1.c uses the following functions from the C library: isalnum(): http://www.sqlite.org/src/artifact/541004e47235cefc?ln=564 isspace(): http://www.sqlite.org/src/artifact/541004e47235cefc?ln=635 isdigit(): http://www.sqlite.org/src/artifact/541004e47235cefc?ln=829 Existing source code declares these as unsafe for hi-bit-set characters and introduces safe replacement versions independent of locale: src/sqliteInt.h: https://www.sqlite.org/src/artifact/424a2020efc9736c?ln=3092-3094 ext/fts2/fts2.c: https://www.sqlite.org/src/artifact/72c816a9ae448049?ln=336-353 ext/fts3/fts3_tokenizer1.c: https://www.sqlite.org/src/artifact/5c98225a53705e5e?ln=54-56 Shouldn't json1.c avoid them for the same reasons? Ralf