This discussion has inspired me to think a little more about what should be done in this specfic context, and I think the attached patch should do it. It is a more drastic patch than the previous one, and I don't have any widestring test material, but it passes some simple tests. I have copied some code from the utf8bidi unit - the functionality I needed was not exported from the unit (but should be?)
Colin
diff -uNr fpc/rtl/objpas/classes/classes.inc 
fpc.w/rtl/objpas/classes/classes.inc
--- fpc/rtl/objpas/classes/classes.inc  2005-03-10 20:14:37.000000000 +0000
+++ fpc.w/rtl/objpas/classes/classes.inc        2005-03-10 21:56:50.500965296 
+0000
@@ -752,22 +752,46 @@
 { Object conversion routines }
 
 type
-  CharToOrdFuncty = Function(var charpo: Pointer): word;
+  CharToOrdFuncty = Function(var charpo: Pointer): Cardinal;
 
-function CharToOrd(var P: Pointer): Word;
+function CharToOrd(var P: Pointer): Cardinal;
 begin
   result:= ord(pchar(P)^);
   inc(pchar(P));
 end;
 
 {$ifdef HASWIDESTRING}
-function WideCharToOrd(var P: Pointer): Word;
+function WideCharToOrd(var P: Pointer): Cardinal;
 begin
   result:= ord(pwidechar(P)^);
   inc(pwidechar(P));
 end;
 {$endif HASWIDESTRING}
 
+function Utf8ToOrd(var P:Pointer): Cardinal;
+begin
+  // Should also check for illegal utf8 combinations
+  Result := Ord(PChar(P)^);
+  Inc(P);
+  if (Result and $80) <> 0 then
+    if (Ord(Result) and %11100000) = %11000000 then begin
+      Result := ((Result and %00011111) shl 6)
+                or (ord(PChar(P)^) and %00111111);
+      Inc(P);
+    end else if (Ord(Result) and %11110000) = %11100000 then begin
+      Result := ((Result and %00011111) shl 12)
+                or ((ord(PChar(P)^) and %00111111) shl 6)
+                or (ord((PChar(P)+1)^) and %00111111);
+      Inc(P,2);
+    end else begin
+      Result := ((ord(Result) and %00011111) shl 18)
+                or ((ord(PChar(P)^) and %00111111) shl 12)
+                or ((ord((PChar(P)+1)^) and %00111111) shl 6)
+                or (ord((PChar(P)+2)^) and %00111111);
+      Inc(P,3);
+    end;
+end;
+
 procedure ObjectBinaryToText(Input, Output: TStream);
 
   procedure OutStr(s: String);
@@ -781,33 +805,29 @@
     OutStr(s + #10);
   end;
 
-  procedure Outchars(P : Pointer; Len : Integer; CharToOrdFunc: 
CharToOrdFuncty);
+  procedure Outchars(P, LastP : Pointer; CharToOrdFunc: CharToOrdFuncty);
 
   var
     res, NewStr: String;
-    i : Integer;
-    w: Word;
+    w: Cardinal;
     InString, NewInString: Boolean;
   begin
     res := '';
     InString := False;
-    for i := 0 to Len-1 do begin
+    while P < LastP do begin
       NewInString := InString;
       w := CharToOrdfunc(P);
-      case w of
-        0..31,127..$ffff: begin //ascii control chars, non ascii
-            if InString then
-              NewInString := False;
-            NewStr := '#' + IntToStr(w);
-          end;
-        ord(''''):              //quote char
-            if InString then NewStr := ''''''
-            else NewStr := '''''''';
-        else begin              //printable ascii
-          if not InString then
-            NewInString := True;
-          NewStr := char(w);
-        end;
+      if w = ord('''') then  //quote char
+        if InString then NewStr := ''''''
+        else NewStr := ''''''''
+      else if (Ord(w) >= 32) and (Ord(w) < 127) then begin //printable ascii
+        if not InString then
+          NewInString := True;
+        NewStr := char(w);
+      end else begin //ascii control chars, non ascii
+        if InString then
+          NewInString := False;
+        NewStr := '#' + IntToStr(w);
       end;
       if NewInString <> InString then begin
         NewStr := '''' + NewStr;
@@ -822,18 +842,22 @@
   procedure OutString(s: String);
 
   begin
-    OutChars(Pointer(S),Length(S),@CharToOrd);
+    OutChars(Pointer(S),PChar(S)+Length(S),@CharToOrd);
   end;
 
   procedure OutWString(W: WideString);
 
   begin
 {$ifdef HASWIDESTRING}
-    OutChars(Pointer(W),Length(W),@WideCharToOrd);
+    OutChars(Pointer(W),pwidechar(W)+Length(W),@WideCharToOrd);
 {$endif HASWIDESTRING}
   end;
 
-
+  procedure OutUtf8Str(s: String);
+  begin
+    OutChars(Pointer(S),PChar(S)+Length(S),@Utf8ToOrd);
+  end;
+  
   function ReadInt(ValueType: TValueType): LongInt;
   begin
     case ValueType of
@@ -877,41 +901,6 @@
 {$endif HASWIDESTRING}
   end;
 
-  function ReadUTF8Str: String;
-  var
-    len, f, t: Integer;
-  begin
-    len := Input.ReadDWord;
-    SetLength(Result, len);
-    if len > 0 then begin
-      Input.Read(Result[1], len);
-      { For now simply take bottom 8 bits of Unicode character }
-      t := 1;
-      f := 1;
-      while f <= len do begin
-        if (Ord(Result[f]) and $80) <> 0 then begin
-          if (Ord(Result[f]) and %11100000) = %11000000 then
-            Inc(f)
-          else if (Ord(Result[f]) and %11110000) = %11100000 then
-            Inc(f,2)
-          else if (Ord(Result[f]) and %11111000) = %11110000 then
-            Inc(f,3)
-          else if (Ord(Result[f]) and %11111100) = %11111000 then
-            Inc(f,4)
-          else if (Ord(Result[f]) and %11111110) = %11111100 then
-            Inc(f,5)
-          else
-            WriteLn('Bad UTF8 Sequence');
-          Result[t] := Char((Ord(Result[f]) and %111111) or ((Ord(Result[f-1]) 
and %11) shl 6));
-        end else
-          Result[t] := Result[f];
-        Inc(f);
-        Inc(t);
-      end;
-      SetLength(Result, t-1);
-    end;
-  end;
-  
   procedure ReadPropList(indent: String);
 
     procedure ProcessValue(ValueType: TValueType; Indent: String);
@@ -1024,7 +1013,7 @@
         vaCurrency: begin OutLn('!!Currency!!'); exit end;
         vaDate: begin OutLn('!!Date!!'); exit end;}
         vaUTF8String: begin
-            OutString(ReadUTF8Str);
+            OutUtf8Str(ReadLStr);
             OutLn('');
           end;
         else
_______________________________________________
fpc-devel maillist  -  fpc-devel@lists.freepascal.org
http://lists.freepascal.org/mailman/listinfo/fpc-devel

Reply via email to