cvsuser     05/03/02 09:00:52

  Modified:    charset  unicode.c
               encodings utf8.c
               io       io_utf8.c
               src      string.c
  Log:
  Strings. Finally. 15 - utf8 transcoding
  
  * implement utf8.copy_to_encoding
  * use it in utf8 IO filter
  * remove constness warning
  
  Revision  Changes    Path
  1.4       +10 -7     parrot/charset/unicode.c
  
  Index: unicode.c
  ===================================================================
  RCS file: /cvs/public/parrot/charset/unicode.c,v
  retrieving revision 1.3
  retrieving revision 1.4
  diff -u -r1.3 -r1.4
  --- unicode.c 2 Mar 2005 15:32:58 -0000       1.3
  +++ unicode.c 2 Mar 2005 17:00:49 -0000       1.4
  @@ -1,6 +1,6 @@
   /*
   Copyright: 2005 The Perl Foundation.  All Rights Reserved.
  -$Id: unicode.c,v 1.3 2005/03/02 15:32:58 leo Exp $
  +$Id: unicode.c,v 1.4 2005/03/02 17:00:49 leo Exp $
   
   =head1 NAME
   
  @@ -165,7 +165,6 @@
       return 0;
   }
   
  -/* Binary's always valid */
   static UINTVAL
   validate(Interp *interpreter, STRING *source_string)
   {
  @@ -287,12 +286,16 @@
   static STRING *
   string_from_codepoint(Interp *interpreter, UINTVAL codepoint)
   {
  -    STRING *return_string;
  +    STRING *dest;
  +    String_iter iter;
   
  -    return_string = string_make(interpreter, "", 1, "unicode", 0);
  -    return_string->strlen = 1;
  -    ENCODING_SET_CODEPOINT(interpreter, return_string, 0, codepoint);
  -    return return_string;
  +    dest = string_make(interpreter, "", 1, "unicode", 0);
  +    dest->strlen = 1;
  +    ENCODING_ITER_INIT(interpreter, dest, &iter);
  +    iter.set_and_advance(interpreter, &iter, codepoint);
  +    dest->bufused = iter.bytepos;
  +
  +    return dest;
   }
   
   static size_t
  
  
  
  1.24      +46 -8     parrot/encodings/utf8.c
  
  Index: utf8.c
  ===================================================================
  RCS file: /cvs/public/parrot/encodings/utf8.c,v
  retrieving revision 1.23
  retrieving revision 1.24
  diff -u -r1.23 -r1.24
  --- utf8.c    2 Mar 2005 15:32:59 -0000       1.23
  +++ utf8.c    2 Mar 2005 17:00:50 -0000       1.24
  @@ -1,6 +1,6 @@
   /*
   Copyright: 2001-2003 The Perl Foundation.  All Rights Reserved.
  -$Id: utf8.c,v 1.23 2005/03/02 15:32:59 leo Exp $
  +$Id: utf8.c,v 1.24 2005/03/02 17:00:50 leo Exp $
   
   =head1 NAME
   
  @@ -306,23 +306,55 @@
   
   
   /* This function needs to go through and get all the code points one
  -   by one and turn them into a byte */
  +   by one and turn them into a utf8 sequence */
   static void
   to_encoding(Interp *interpreter, STRING *src)
   {
  +    if (src->encoding == Parrot_utf8_encoding_ptr)
  +        return;
       UNIMPL;
   }
   
   static STRING *
   copy_to_encoding(Interp *interpreter, STRING *src)
   {
  -    STRING *return_string = NULL;
  -
  -    UNIMPL;
  -    return return_string;
  +    STRING *dest;
  +    String_iter src_iter, dest_iter;
  +    UINTVAL offs, c;
  +
  +    if (src->encoding == Parrot_utf8_encoding_ptr)
  +        return string_copy(interpreter, src);
  +
  +    /*
  +     * TODO adapt string creation functions
  +     */
  +    dest = string_make_empty(interpreter, enum_stringrep_one, src->strlen);
  +    dest->charset  = Parrot_unicode_charset_ptr;
  +    dest->encoding = Parrot_utf8_encoding_ptr;
  +    dest->strlen   = src->strlen;
  +
  +    if (!src->strlen)
  +        return dest;
  +
  +    ENCODING_ITER_INIT(interpreter, src, &src_iter);
  +    ENCODING_ITER_INIT(interpreter, dest, &dest_iter);
  +
  +    for (offs = 0; offs < src->strlen; ++offs) {
  +        c = src_iter.get_and_advance(interpreter, &src_iter);
  +        if (dest_iter.bytepos >= PObj_buflen(dest) - 4) {
  +            UINTVAL need = (src->strlen - offs) * 1.5;
  +            if (need < 16)
  +                need = 16;
  +            Parrot_reallocate_string(interpreter, dest,
  +                    PObj_buflen(dest) + need);
  +        }
  +        dest_iter.set_and_advance(interpreter, &dest_iter, c);
  +    }
  +    assert(dest->strlen  == dest_iter.charpos);
  +    dest->bufused = dest_iter.bytepos;
  +    return dest;
   }
   
  -/* codepoints are bytes, so delegate */
   static UINTVAL
   get_codepoint(Interp *interpreter, const STRING *src, UINTVAL offset)
   {
  @@ -337,9 +369,15 @@
        UINTVAL offset, UINTVAL codepoint)
   {
       const void *start;
  +    void *p;
  +    union {
  +        const void * __c_ptr;
  +        void * __ptr;
  +    } __ptr_u;
   
       start = utf8_skip_forward(src->strstart, offset);
  -    utf8_encode(start, codepoint);
  +    p = const_cast(start);
  +    utf8_encode(p, codepoint);
   }
   
   static UINTVAL
  
  
  
  1.4       +8 -14     parrot/io/io_utf8.c
  
  Index: io_utf8.c
  ===================================================================
  RCS file: /cvs/public/parrot/io/io_utf8.c,v
  retrieving revision 1.3
  retrieving revision 1.4
  diff -u -r1.3 -r1.4
  --- io_utf8.c 14 Feb 2005 11:34:22 -0000      1.3
  +++ io_utf8.c 2 Mar 2005 17:00:51 -0000       1.4
  @@ -1,6 +1,6 @@
   /*
   Copyright: 2001-2003 The Perl Foundation.  All Rights Reserved.
  -$Id: io_utf8.c,v 1.3 2005/02/14 11:34:22 leo Exp $
  +$Id: io_utf8.c,v 1.4 2005/03/02 17:00:51 leo Exp $
   
   =head1 NAME
   
  @@ -47,19 +47,13 @@
   static size_t
   PIO_utf8_write(theINTERP, ParrotIOLayer *l, ParrotIO *io, STRING *s)
   {
  -    STRING n;
  -    size_t idx, length = string_length(interpreter, s);
  -    char *buffer = malloc(4*length);
  -    char *cursor = buffer;
  -
  -    for( idx = 0; idx < length; ++idx )
  -    {
  -        cursor = Parrot_utf8_encode(cursor, string_index(interpreter, s, 
idx));
  -    }
  -
  -    n.strstart = buffer;
  -    n.bufused = cursor - buffer;
  -    return PIO_write_down(interpreter, l->down, io, &n);
  +    STRING *dest;
  +
  +    if (s->encoding == Parrot_utf8_encoding_ptr)
  +        return PIO_write_down(interpreter, l->down, io, s);
  +
  +    dest = Parrot_utf8_encoding_ptr->copy_to_encoding(interpreter, s);
  +    return PIO_write_down(interpreter, l->down, io, dest);
   }
   
   static const ParrotIOLayerAPI pio_utf8_layer_api = {
  
  
  
  1.244     +4 -1      parrot/src/string.c
  
  Index: string.c
  ===================================================================
  RCS file: /cvs/public/parrot/src/string.c,v
  retrieving revision 1.243
  retrieving revision 1.244
  diff -u -r1.243 -r1.244
  --- string.c  2 Mar 2005 15:32:59 -0000       1.243
  +++ string.c  2 Mar 2005 17:00:51 -0000       1.244
  @@ -1,6 +1,6 @@
   /*
   Copyright: 2001-2003 The Perl Foundation.  All Rights Reserved.
  -$Id: string.c,v 1.243 2005/03/02 15:32:59 leo Exp $
  +$Id: string.c,v 1.244 2005/03/02 17:00:51 leo Exp $
   
   =head1 NAME
   
  @@ -384,6 +384,9 @@
   
       s = new_string_header(interpreter, 0);
   
  +    /*
  +     * TODO adapt string creation functions
  +     */
       if (representation == enum_stringrep_one) {
           s->charset = PARROT_DEFAULT_CHARSET;
           s->encoding = CHARSET_GET_PREFERRED_ENCODING(interpreter, s);;
  
  
  

Reply via email to