The patch below implements function 'ucodeToString' which
given unicode codepoint (Integer) produces string
containing corresponding character.  In case of non-Unicode
aware Lisp (like gcl) the string constains possibly multiple
octets encoded using UTF-8.

Moreover the patch changes Character and CharacterClass to
avoid (at least some) errors due to character codes beeing
larger than 255.  I also changed random in Character to
generate only ASCII -- while natural expectation is that
random should give uniform distribution, such expectation
is not reasonable for Unicode.  Namely some Unicode codepoints
are explicitly declared as illegal, currently most Unicode
codepoints is unassigned so they may be illegal is some
contexts.  And even legal characters may be unsupported on
given system.  Getting unsupported characters from 'random'
IMHO would be a bug, and the only characters which are
"surely" supported are ASCII...

With that patch I can do

char(945)

is sbcl and this gives expected effect.  Also I can do

alpha := ucodeToString(945)::Symbol

and get symbol with Unicode name in gcl.  Unfortunatly,
when using gcl (or other non-Unicode aware Lisp) display
on console may be misaligned: console display routines
count octets but using UTF-8 octet count is different
than character count...

Index: src/algebra/string.spad.pamphlet
===================================================================
--- src/algebra/string.spad.pamphlet    (revision 979)
+++ src/algebra/string.spad.pamphlet    (working copy)
@@ -70,7 +70,7 @@
             ++ i.e. one of 0..9, a..z or A..Z.
 
     == add
-        Rep := SingleInteger      -- 0..255
+        Rep := SingleInteger      -- 0..(1114112 - 1)
 
         CC ==> CharacterClass()
         import CC
@@ -79,23 +79,21 @@
         --    [ digit(), hexDigit(),
         --      upperCase(), lowerCase(), alphabetic(), alphanumeric() ]
 
-        OutChars:PrimitiveArray(OutputForm) :=
-           construct [NUM2CHAR(i)$Lisp for i in 0..255]
-
-        minChar := minIndex OutChars
-
         a = b                  == a =$Rep b
         a < b                  == a <$Rep b
-        size()                 == 256
+        -- size()                 == 256
+        size()                 == 1114112
         index n                == char((n - 1)::Integer)
         lookup c               == (1 + ord c)::PositiveInteger
         char(n:Integer)        == n::%
         ord c                  == convert(c)$Rep
-        random()               == char(random(size())$Integer)
+        -- random()               == char(random(size())$Integer)
+        -- FIXME: limit to ASCII for now
+        random()               == char(random(128)$Integer)
         space                  == QENUM("   ", 0$Lisp)$Lisp
         quote                  == QENUM("_" ", 0$Lisp)$Lisp
         escape                 == QENUM("__ ", 0$Lisp)$Lisp
-        coerce(c:%):OutputForm == OutChars(minChar + ord c)
+        coerce(c:%):OutputForm == NUM2USTR(ord c)$Lisp
         digit? c               == member?(c pretend Character, digit())
         hexDigit? c            == member?(c pretend Character, hexDigit())
         upperCase? c           == member?(c pretend Character, upperCase())
@@ -168,7 +166,8 @@
 
     == add
         Rep := IndexedBits(0)
-        N   := size()$Character
+        -- N   := size()$Character
+        N   := 256
 
         a, b: %
 
@@ -181,10 +180,14 @@
 
         a = b           == a =$Rep b
 
-        member?(c, a)   == a(ord c)
+        member?(c, a)   ==
+            (i := ord c) < N => a(i)
+            false
+
         union(a,b)      == Or(a, b)
         intersect (a,b) == And(a, b)
         difference(a,b) == And(a, Not b)
+        -- FIXME: this is bogus for codes >= N
         complement a    == Not a
 
         convert(cl):String ==
@@ -193,14 +196,18 @@
           [char(i) for i in 0..N-1 | cl.i]
 
         charClass(s: String) ==
-          cl := new(N, false)
-          for i in minIndex(s)..maxIndex(s) repeat cl(ord s.i) := true
-          cl
+            cl := new(N, false)
+            for i in minIndex(s)..maxIndex(s) repeat
+                (j := ord s.i) >= N => error "character code too large"
+                cl(j) := true
+            cl
 
         charClass(l: List Character) ==
-          cl := new(N, false)
-          for c in l repeat cl(ord c) := true
-          cl
+            cl := new(N, false)
+            for c in l repeat
+                (j := ord c) >= N => error "character code too large"
+                cl(j) := true
+            cl
 
         coerce(cl):OutputForm == (convert(cl)@String)::OutputForm
 
@@ -209,13 +216,19 @@
         empty():%       == charClass []
         brace():%       == charClass []
 
-        insert_!(c, a)  == (a(ord c) := true; a)
-        remove_!(c, a)  == (a(ord c) := false; a)
+        insert_!(c, a)  ==
+            (i := ord c) < N => (a(i) := true; a)
+            error "character code too large"
 
+        remove_!(c, a)  ==
+            if(i := ord c) < N then a(i) := false
+            a
+
         inspect(a) ==
             for i in 0..N-1 | a.i repeat
                  return char i
             error "Cannot take a character from an empty class."
+
         extract_!(a) ==
             for i in 0..N-1 | a.i repeat
                  a.i := false
@@ -259,6 +272,7 @@
   Export ==> StringAggregate() with
       hash: % -> I
         ++ hash(x) provides a hashing function for strings
+      -- ucodeToString : I -> %
 
   Implementation ==> add
     -- These assume Character's Rep is Small I
@@ -273,6 +287,7 @@
     c:  Character
     cc: CharacterClass
 
+    -- ucodeToString(n)       == NUM2USTR(n)$Lisp
 --  new n                  == MAKE_-FULL_-CVEC(n, space$C)$Lisp
     new(n, c)              == MAKE_-FULL_-CVEC(n, c)$Lisp
     empty()                == MAKE_-FULL_-CVEC(0$Lisp)$Lisp
@@ -451,7 +466,11 @@
 ++   This is the domain of character strings.
 MINSTRINGINDEX ==> 1          -- as of 3/14/90.
 
-String(): StringCategory == IndexedString(MINSTRINGINDEX) add
+String(): StringCategory with
+    ucodeToString : Integer -> %
+  == IndexedString(MINSTRINGINDEX) add
+
+    ucodeToString(n : Integer) : %       == NUM2USTR(n)$Lisp
     string n == STRINGIMAGE(n)$Lisp
 
     OMwrite(x: %): String ==
Index: src/interp/sys-pkg.lisp
===================================================================
--- src/interp/sys-pkg.lisp     (revision 979)
+++ src/interp/sys-pkg.lisp     (working copy)
@@ -307,6 +307,7 @@
    vmlisp::|mkOutputConsoleStream|
    vmlisp::|rMkIstream|
    vmlisp::|rMkOstream|
+   vmlisp::NUM2USTR
 ))
 
 (in-package "FOAM")
Index: src/interp/vmlisp.lisp
===================================================================
--- src/interp/vmlisp.lisp      (revision 982)
+++ src/interp/vmlisp.lisp      (working copy)
@@ -650,10 +650,45 @@
 (defun charp (a) (or (characterp a)
                      (and (identp a) (= (length (symbol-name a)) 1))))
 
+
 (defun NUM2CHAR (n) (code-char n))
 
 (defun CHAR2NUM (c) (char-code (character c)))
 
+#+(or :UNICODE :SB-UNICODE)
+(defun NUM2USTR (n) 
+    (make-string 1 :initial-element (NUM2CHAR n)))
+#-(or :UNICODE :SB-UNICODE)
+(defun NUM2USTR (n)
+   (let (k n1 n2 n3 n4 (l nil))
+       (cond
+           ((< n 128)
+               (setf k 1)
+               (setf l (list n)))
+           ((< n (ash 1 11))
+               (setf k 2)
+               (setf n1 (logior 128 (logand 63 n)))
+               (setf n2 (logior 192 (logand 31 (ash n -6))))
+               (setf l (list n2 n1)))
+           ((< n (ash 1 16))
+               (setf k 3)
+               (setf n1 (logior 128 (logand 63 n)))
+               (setf n2 (logior 128 (logand 63 (ash n -6))))
+               (setf n3 (logior 224 (logand 15 (ash n -12))))
+               (setf l (list n3 n2 n1)))
+           ((< n (ash 1 21))
+               (setf k 4)
+               (setf n1 (logior 128 (logand 63 n)))
+               (setf n2 (logior 128 (logand 63 (ash n -6))))
+               (setf n3 (logior 128 (logand 63 (ash n -12))))
+               (setf n4 (logior 240 (logand 7 (ash n -18))))
+               (setf l (list n4 n3 n2 n1)))
+           (t
+               (|error| "Too large character code"))
+       )
+       (make-array k :element-type 'character
+                  :initial-contents (mapcar #'code-char l))))
+
 (defun CGREATERP (s1 s2) (string> (string s1) (string s2)))
 
 (define-function 'STRGREATERP #'CGREATERP)
-- 
                              Waldek Hebisch
[email protected] 

-- 
You received this message because you are subscribed to the Google Groups 
"FriCAS - computer algebra system" group.
To post to this group, send email to [email protected].
To unsubscribe from this group, send email to 
[email protected].
For more options, visit this group at 
http://groups.google.com/group/fricas-devel?hl=en.

Reply via email to