Author: simon
Date: Mon Jan 26 13:12:52 2009
New Revision: 36039

Modified:
   branches/strings/pseudocode/ParrotEncoding/ParrotNative.pm
   branches/strings/pseudocode/ParrotString.pm
   branches/strings/pseudocode/t/recode.t

Log:
Implement char-wise equality testing, and rework the way native encoding 
iterates over stuff.


Modified: branches/strings/pseudocode/ParrotEncoding/ParrotNative.pm
==============================================================================
--- branches/strings/pseudocode/ParrotEncoding/ParrotNative.pm  (original)
+++ branches/strings/pseudocode/ParrotEncoding/ParrotNative.pm  Mon Jan 26 
13:12:52 2009
@@ -1,7 +1,21 @@
-class ParrotEncoding::ParrotNative is ParrotEncoding::Base::Fixed {
+class ParrotEncoding::ParrotNative {
     our $.width = 1;
 
     method setup($str) { $str.normalization = ParrotNormalization::NFG.new(); }
+    method string_grapheme_iterate($str, $callback, $parameter) {
+        for (0..$str.bufused-1) {
+            $callback(self.grapheme_at_index($str,$_), $parameter); 
+        }
+    }
+    method string_length($str) { 
+        # We're not really all that fixed - borrow this from the
+        # variable one. You could optimize this by checking if there's
+        # any entries in the grapheme table.
+        my $data = 0; 
+        my $callback = sub ($char, $data is rw) { $data++ };
+        $str.encoding.string_char_iterate($str, $callback, $data);
+        return $data;
+    }
     method append_grapheme ($str, $g) {
         if (@($g) > 1) {
             my $item;
@@ -16,9 +30,9 @@
 
     method string_char_iterate ($str, $callback, $parameter) {
         for (0..$str.bufused-1) { 
-            my $grapheme = grapheme_at_index($str, $_);
+            my $grapheme = self.grapheme_at_index($str, $_);
             for (@( $grapheme )) {
-                $callback($str.buffer.[$_], $parameter); 
+                $callback($_, $parameter); 
             }
         }
     }
@@ -27,6 +41,7 @@
         # We need to look inside each grapheme, since NFG stores individual
         # graphemes and graphemes are composed of multiple characters - 
         # this could be improved with caching later but we will 
+        # do it the slow stupid way for now 
         ...
     }
 
@@ -34,6 +49,7 @@
         if (!$str.normalization) { 
             $str.charset.normalize($str, ParrotNormalization::NFG);
         }
+        die $index~" Read off end of buffer" if $index > $str.buffer - 1;
         my $c = $str.buffer[$index];
         if $c >= 0 { return [ $c ]; }
         return $str.normalization.grapheme_table.[-$c - 1];

Modified: branches/strings/pseudocode/ParrotString.pm
==============================================================================
--- branches/strings/pseudocode/ParrotString.pm (original)
+++ branches/strings/pseudocode/ParrotString.pm Mon Jan 26 13:12:52 2009
@@ -116,13 +116,16 @@
     if ($one.strlen != $two.strlen) { return 0; }
     for (0 .. $one.strlen-1) {
         if ($one.buffer.[$_] != $two.buffer.[$_]) { 
-            say "Oops, byte "~$_~" differed";
             return 0 
         }
     }
     return 1;
 }
 sub Parrot_string_character_equal($one, $two) {
-    say "Not implemented yet";
-    return 0;
+    my $l = Parrot_string_length($one);
+    return 0 if $l != Parrot_string_length($two);
+    for (0.. $l-1) {
+        return 0 if Parrot_string_index($one, $_) != Parrot_string_index($two, 
$_);
+    }
+    return 1;
 }

Modified: branches/strings/pseudocode/t/recode.t
==============================================================================
--- branches/strings/pseudocode/t/recode.t      (original)
+++ branches/strings/pseudocode/t/recode.t      Mon Jan 26 13:12:52 2009
@@ -1,12 +1,13 @@
 use Test;
 use ParrotString;
-plan 4;
+plan 5;
 
 # The standard NFG example...
 my $str = Parrot_string_new_init("ABC \xd0\xb8\xcc\x8f", 8, 
ParrotCharset::Unicode, ParrotEncoding::UTF8);
 my $str2 = Parrot_string_new_init("", 0, ParrotCharset::Unicode, 
ParrotEncoding::ParrotNative);
 Parrot_string_grapheme_copy($str, $str2);
 is(Parrot_string_grapheme_length($str2), 5, "Four UTF8 bytes = one grapheme");
+is(Parrot_string_length($str2), 6, "One grapheme is actually two chars");
 my $str3 = Parrot_string_new_init("", 0, ParrotCharset::Unicode, 
ParrotEncoding::UTF8);
 
 Parrot_string_grapheme_copy($str2, $str3);

Reply via email to