This warning: string.c: In function `string_transcode': string.c:194: warning: passing arg 2 of pointer to function as unsigned due to prototype
represents a can of worms. The summary is "are characters signed or unsigned?" I am of the opinion that they are UINTVAL, not INTVAL. (and EOF being a negative value such as -1 is only needed for C stdio, and I seem to remember that Dan has strong opinions on C stdio, and what C can do with it) This is not a very considered opinion, I should add. It just feels safer with them as unsigned, on the assumption that our code doesn't do EOF. In which case, the following rather involved patch is needed. Or something similar. And it's scary because it redefines chartypes, so please could someone sanity check it. I thought that it should be this INTVAL (*get_digit)(UINTVAL c); not this UINTVAL (*get_digit)(UINTVAL c); as I'd not be surprised if Unicode contains a glyph in some script that is for a digit with negative value. (And if there isn't the Klingons will invent one to be awkward) Nicholas Clark -- ENOCHOCOLATE http://www.ccl4.org/~nick/CV.html --- include/parrot/chartype.h~ Thu Dec 27 18:50:28 2001 +++ include/parrot/chartype.h Mon Jan 21 19:12:16 2002 @@ -13,15 +13,15 @@ #if !defined(PARROT_CHARTYPE_H_GUARD) #define PARROT_ENCODING_H_GUARD -typedef INTVAL (*CHARTYPE_TRANSCODER)(INTVAL c); +typedef UINTVAL (*CHARTYPE_TRANSCODER)(UINTVAL c); typedef struct { const char *name; const char *default_encoding; CHARTYPE_TRANSCODER (*transcode_from)(const char *from); CHARTYPE_TRANSCODER (*transcode_to)(const char *to); - BOOLVAL (*is_digit)(INTVAL c); - INTVAL (*get_digit)(INTVAL c); + BOOLVAL (*is_digit)(UINTVAL c); + INTVAL (*get_digit)(UINTVAL c); } CHARTYPE; const CHARTYPE * --- ../parrot/string.c Tue Jan 15 23:14:51 2002 +++ string.c Mon Jan 21 19:28:24 2002 @@ -186,7 +186,7 @@ destend = deststart; while (srcstart < srcend) { - INTVAL c = src->encoding->decode(srcstart); + UINTVAL c = src->encoding->decode(srcstart); if (transcoder1) c = transcoder1(c); if (transcoder2) c = transcoder2(c); @@ -424,7 +424,7 @@ } if (len == 1) { - INTVAL c = s->encoding->decode(s->bufstart); + UINTVAL c = s->encoding->decode(s->bufstart); if (s->type->is_digit(c) && s->type->get_digit(c) == 0) { return 0; } @@ -456,7 +456,7 @@ BOOLVAL in_number = 0; while (start < end) { - INTVAL c = s->encoding->decode(start); + UINTVAL c = s->encoding->decode(start); if (s->type->is_digit(c)) { in_number = 1; @@ -500,7 +500,7 @@ INTVAL fake_exponent = 0; while (start < end) { - INTVAL c = s->encoding->decode(start); + UINTVAL c = s->encoding->decode(start); if (s->type->is_digit(c)) { if (in_exp) { --- ../parrot/chartypes/unicode.c Tue Jan 15 20:02:54 2002 +++ chartypes/unicode.c Mon Jan 21 20:06:09 2002 @@ -23,12 +23,12 @@ } static BOOLVAL -unicode_is_digit(INTVAL c) { +unicode_is_digit(UINTVAL c) { return (BOOLVAL)(isdigit(c) ? 1 : 0); /* FIXME - Other code points are also digits */ } -static INTVAL -unicode_get_digit(INTVAL c) { +static UINTVAL +unicode_get_digit(UINTVAL c) { return c - '0'; /* FIXME - many more digits than this... */ } --- ../parrot/chartypes/usascii.c Tue Jan 15 20:02:54 2002 +++ chartypes/usascii.c Mon Jan 21 20:10:49 2002 @@ -12,9 +12,9 @@ #include "parrot/parrot.h" -static INTVAL -usascii_transcode_from_unicode(INTVAL c) { - if (c < 0 || c > 127) { +static UINTVAL +usascii_transcode_from_unicode(UINTVAL c) { + if (c > 127) { internal_exception(INVALID_CHARACTER, "Invalid character for US-ASCII"); } return c; @@ -30,8 +30,8 @@ } } -static INTVAL -usascii_transcode_to_unicode(INTVAL c) { +static UINTVAL +usascii_transcode_to_unicode(UINTVAL c) { return c; } @@ -46,13 +46,13 @@ } static BOOLVAL -usascii_is_digit(INTVAL c) { - return (BOOLVAL)(isdigit(c) ? 1 : 0); +usascii_is_digit(UINTVAL c) { + return (BOOLVAL)(isdigit((int) c) ? 1 : 0); } static INTVAL -usascii_get_digit(INTVAL c) { - return c - '0'; +usascii_get_digit(UINTVAL c) { + return ((INTVAL) c) - '0'; } const CHARTYPE usascii_chartype = {