Author: leo
Date: Wed Mar 22 02:53:05 2006
New Revision: 11978
Modified:
trunk/src/io/io_utf8.c
trunk/t/pmc/io.t
Log:
strings - utf8 input filter
* implement PIO_utf8_read filter - please note that it isn't safe
to read arbitrary parts of a file
* test
Modified: trunk/src/io/io_utf8.c
==============================================================================
--- trunk/src/io/io_utf8.c (original)
+++ trunk/src/io/io_utf8.c Wed Mar 22 02:53:05 2006
@@ -43,6 +43,23 @@
return &pio_utf8_layer;
}
+static size_t
+PIO_utf8_read(theINTERP, ParrotIOLayer *layer, ParrotIO *io,
+ STRING **buf)
+{
+ size_t len;
+ STRING *s;
+
+ len = PIO_read_down(interpreter, layer->down, io, buf);
+ s = *buf;
+ s->charset = Parrot_unicode_charset_ptr;
+ s->encoding = Parrot_utf8_encoding_ptr;
+ /* count chars, verify utf8 */
+ s->strlen = Parrot_utf8_encoding_ptr->codepoints(interpreter, s);
+ /* TODO buffer additional chars for next read */
+ return len;
+}
+
void *Parrot_utf8_encode(void *ptr, UINTVAL c);
static size_t
PIO_utf8_write(theINTERP, ParrotIOLayer *l, ParrotIO *io, STRING *s)
@@ -71,7 +88,7 @@
PIO_null_close,
PIO_utf8_write,
PIO_null_write_async,
- PIO_null_read,
+ PIO_utf8_read,
PIO_null_read_async,
PIO_null_flush,
PIO_null_peek,
Modified: trunk/t/pmc/io.t
==============================================================================
--- trunk/t/pmc/io.t (original)
+++ trunk/t/pmc/io.t Wed Mar 22 02:53:05 2006
@@ -6,7 +6,7 @@
use warnings;
use lib qw( . lib ../lib ../../lib );
use Test::More;
-use Parrot::Test tests => 32;
+use Parrot::Test tests => 33;
=head1 NAME
@@ -617,3 +617,40 @@
CODE
/some crazy exception/
OUT
+
+open FOO, ">temp.file"; # write utf8
+print FOO "T\xc3\xb6tsch\n";
+close FOO;
+
+pir_output_is(<<'CODE', <<"OUTPUT", "utf8 read layer");
+.sub main :main
+ .local pmc pio
+ .local int len
+ .include "stat.pasm"
+ .local string f
+ f = 'temp.file'
+ len = stat f, .STAT_FILESIZE
+ pio = open f, "<"
+ push pio, "utf8"
+ $S0 = read pio, len
+ close pio
+ $I1 = charset $S0
+ $S2 = charsetname $I1
+ print $S2
+ print "\n"
+ $I1 = encoding $S0
+ $S2 = encodingname $I1
+ print $S2
+ print "\n"
+ $I1 = find_charset 'iso-8859-1'
+ trans_charset $S1, $S0, $I1
+ print $S1
+.end
+CODE
+unicode
+utf8
+T\xf6tsch
+OUTPUT
+
+
+unlink("temp.file");