Author: leo
Date: Wed Mar 22 02:53:05 2006
New Revision: 11978

Modified:
   trunk/src/io/io_utf8.c
   trunk/t/pmc/io.t

Log:
strings - utf8 input filter

* implement PIO_utf8_read filter - please note that it isn't safe
  to read arbitrary parts of a file
* test


Modified: trunk/src/io/io_utf8.c
==============================================================================
--- trunk/src/io/io_utf8.c      (original)
+++ trunk/src/io/io_utf8.c      Wed Mar 22 02:53:05 2006
@@ -43,6 +43,23 @@
     return &pio_utf8_layer;
 }
 
+static size_t
+PIO_utf8_read(theINTERP, ParrotIOLayer *layer, ParrotIO *io,
+              STRING **buf)
+{
+    size_t len;
+    STRING *s;
+
+    len = PIO_read_down(interpreter, layer->down, io, buf);
+    s = *buf;
+    s->charset  = Parrot_unicode_charset_ptr;
+    s->encoding = Parrot_utf8_encoding_ptr;
+    /* count chars, verify utf8 */
+    s->strlen = Parrot_utf8_encoding_ptr->codepoints(interpreter, s);
+    /* TODO buffer additional chars for next read */
+    return len;
+}
+
 void *Parrot_utf8_encode(void *ptr, UINTVAL c);
 static size_t
 PIO_utf8_write(theINTERP, ParrotIOLayer *l, ParrotIO *io, STRING *s)
@@ -71,7 +88,7 @@
     PIO_null_close,
     PIO_utf8_write,
     PIO_null_write_async,
-    PIO_null_read,
+    PIO_utf8_read,
     PIO_null_read_async,
     PIO_null_flush,
     PIO_null_peek,

Modified: trunk/t/pmc/io.t
==============================================================================
--- trunk/t/pmc/io.t    (original)
+++ trunk/t/pmc/io.t    Wed Mar 22 02:53:05 2006
@@ -6,7 +6,7 @@
 use warnings;
 use lib qw( . lib ../lib ../../lib );
 use Test::More;
-use Parrot::Test tests => 32;
+use Parrot::Test tests => 33;
 
 =head1 NAME
 
@@ -617,3 +617,40 @@
 CODE
 /some crazy exception/
 OUT
+
+open FOO, ">temp.file";  # write utf8
+print FOO "T\xc3\xb6tsch\n";
+close FOO;
+
+pir_output_is(<<'CODE', <<"OUTPUT", "utf8 read layer");
+.sub main :main
+    .local pmc pio
+    .local int len
+    .include "stat.pasm"
+    .local string f
+    f = 'temp.file'
+    len = stat f, .STAT_FILESIZE
+    pio = open f, "<"
+    push pio, "utf8"
+    $S0 = read pio, len
+    close pio
+    $I1 = charset $S0
+    $S2 = charsetname $I1
+    print $S2
+    print "\n"
+    $I1 = encoding $S0
+    $S2 = encodingname $I1
+    print $S2
+    print "\n"
+    $I1 = find_charset 'iso-8859-1'
+    trans_charset $S1, $S0, $I1
+    print $S1
+.end
+CODE
+unicode
+utf8
+T\xf6tsch
+OUTPUT
+
+
+unlink("temp.file");

Reply via email to