[PATCH] are characters unsigned?

Nicholas Clark Mon, 21 Jan 2002 14:20:02 -0800

This warning:

string.c: In function `string_transcode':
string.c:194: warning: passing arg 2 of pointer to function as unsigned due to 
prototype


represents a can of worms. The summary is "are characters signed or unsigned?"

I am of the opinion that they are UINTVAL, not INTVAL. (and EOF being a
negative value such as -1 is only needed for C stdio, and I seem to remember
that Dan has strong opinions on C stdio, and what C can do with it)

This is not a very considered opinion, I should add. It just feels safer with
them as unsigned, on the assumption that our code doesn't do EOF.

In which case, the following rather involved patch is needed. Or something
similar. And it's scary because it redefines chartypes, so please could
someone sanity check it.

I thought that it should be this

    INTVAL (*get_digit)(UINTVAL c);

not this

    UINTVAL (*get_digit)(UINTVAL c);

as I'd not be surprised if Unicode contains a glyph in some script that is
for a digit with negative value. (And if there isn't the Klingons will
invent one to be awkward)

Nicholas Clark
-- 
ENOCHOCOLATE http://www.ccl4.org/~nick/CV.html

--- include/parrot/chartype.h~  Thu Dec 27 18:50:28 2001
+++ include/parrot/chartype.h   Mon Jan 21 19:12:16 2002
@@ -13,15 +13,15 @@
 #if !defined(PARROT_CHARTYPE_H_GUARD)
 #define PARROT_ENCODING_H_GUARD
 
-typedef INTVAL (*CHARTYPE_TRANSCODER)(INTVAL c);
+typedef UINTVAL (*CHARTYPE_TRANSCODER)(UINTVAL c);
 
 typedef struct {
     const char *name;
     const char *default_encoding;
     CHARTYPE_TRANSCODER (*transcode_from)(const char *from);
     CHARTYPE_TRANSCODER (*transcode_to)(const char *to);
-    BOOLVAL (*is_digit)(INTVAL c);
-    INTVAL (*get_digit)(INTVAL c);
+    BOOLVAL (*is_digit)(UINTVAL c);
+    INTVAL (*get_digit)(UINTVAL c);
 } CHARTYPE;
 
 const CHARTYPE *
--- ../parrot/string.c  Tue Jan 15 23:14:51 2002
+++ string.c    Mon Jan 21 19:28:24 2002
@@ -186,7 +186,7 @@
     destend = deststart;
 
     while (srcstart < srcend) {
-        INTVAL c = src->encoding->decode(srcstart);
+        UINTVAL c = src->encoding->decode(srcstart);
 
         if (transcoder1) c = transcoder1(c);
         if (transcoder2) c = transcoder2(c);
@@ -424,7 +424,7 @@
     }
 
     if (len == 1) {
-        INTVAL c = s->encoding->decode(s->bufstart);
+        UINTVAL c = s->encoding->decode(s->bufstart);
         if (s->type->is_digit(c) && s->type->get_digit(c) == 0) {
             return 0;
         }
@@ -456,7 +456,7 @@
         BOOLVAL in_number = 0;
 
         while (start < end) {
-            INTVAL c = s->encoding->decode(start);
+            UINTVAL c = s->encoding->decode(start);
 
             if (s->type->is_digit(c)) {
                 in_number = 1;
@@ -500,7 +500,7 @@
         INTVAL fake_exponent = 0;
 
         while (start < end) {
-            INTVAL c = s->encoding->decode(start);
+            UINTVAL c = s->encoding->decode(start);
 
             if (s->type->is_digit(c)) {
                 if (in_exp) {
--- ../parrot/chartypes/unicode.c       Tue Jan 15 20:02:54 2002
+++ chartypes/unicode.c Mon Jan 21 20:06:09 2002
@@ -23,12 +23,12 @@
 }
 
 static BOOLVAL
-unicode_is_digit(INTVAL c) {
+unicode_is_digit(UINTVAL c) {
     return (BOOLVAL)(isdigit(c) ? 1 : 0); /* FIXME - Other code points are also 
digits */
 }
 
-static INTVAL
-unicode_get_digit(INTVAL c) {
+static UINTVAL
+unicode_get_digit(UINTVAL c) {
     return c - '0'; /* FIXME - many more digits than this... */
 }
 
--- ../parrot/chartypes/usascii.c       Tue Jan 15 20:02:54 2002
+++ chartypes/usascii.c Mon Jan 21 20:10:49 2002
@@ -12,9 +12,9 @@
 
 #include "parrot/parrot.h"
 
-static INTVAL
-usascii_transcode_from_unicode(INTVAL c) {
-    if (c < 0 || c > 127) {
+static UINTVAL
+usascii_transcode_from_unicode(UINTVAL c) {
+    if (c > 127) {
         internal_exception(INVALID_CHARACTER, "Invalid character for US-ASCII");
     }
     return c;
@@ -30,8 +30,8 @@
     }
 }
 
-static INTVAL
-usascii_transcode_to_unicode(INTVAL c) {
+static UINTVAL
+usascii_transcode_to_unicode(UINTVAL c) {
     return c;
 }
 
@@ -46,13 +46,13 @@
 }
 
 static BOOLVAL
-usascii_is_digit(INTVAL c) {
-    return (BOOLVAL)(isdigit(c) ? 1 : 0);
+usascii_is_digit(UINTVAL c) {
+    return (BOOLVAL)(isdigit((int) c) ? 1 : 0);
 }
 
 static INTVAL
-usascii_get_digit(INTVAL c) {
-    return c - '0';
+usascii_get_digit(UINTVAL c) {
+    return ((INTVAL) c) - '0';
 }
 
 const CHARTYPE usascii_chartype = {

[PATCH] are characters unsigned?

Reply via email to