cvsuser     05/03/01 07:41:31

  Modified:    charset  ascii.c ascii.h iso-8859-1.c
               src      charset.c
               t/op     string_cs.t
  Log:
  Strings. Finally. 9 - more charset converters
  
  Revision  Changes    Path
  1.14      +60 -15    parrot/charset/ascii.c
  
  Index: ascii.c
  ===================================================================
  RCS file: /cvs/public/parrot/charset/ascii.c,v
  retrieving revision 1.13
  retrieving revision 1.14
  diff -u -r1.13 -r1.14
  --- ascii.c   1 Mar 2005 14:19:45 -0000       1.13
  +++ ascii.c   1 Mar 2005 15:41:25 -0000       1.14
  @@ -1,6 +1,6 @@
   /*
   Copyright: 2004 The Perl Foundation.  All Rights Reserved.
  -$Id: ascii.c,v 1.13 2005/03/01 14:19:45 leo Exp $
  +$Id: ascii.c,v 1.14 2005/03/01 15:41:25 leo Exp $
   
   =head1 NAME
   
  @@ -95,33 +95,43 @@
               offset, count, dest_string);
   }
   
  +
   static STRING *
  -to_charset(Interp *interpreter, STRING *src, CHARSET *new_charset, STRING 
*dest)
  +from_charset(Interp *interpreter, STRING *source_string, STRING *dest)
   {
  -    internal_exception(UNIMPLEMENTED, "to_charset for ascii not 
implemented");
  +    internal_exception(UNIMPLEMENTED, "Can't do this yet");
       return NULL;
   }
   
  -
   static STRING *
  -to_unicode(Interp *interpreter, STRING *source_string, STRING *dest)
  +from_unicode(Interp *interpreter, STRING *source_string, STRING *dest)
   {
  -    internal_exception(UNIMPLEMENTED, "to_unicode for ascii not 
implemented");
  +    internal_exception(UNIMPLEMENTED, "Can't do this yet");
       return NULL;
   }
   
  -static STRING *
  -from_charset(Interp *interpreter, STRING *source_string, STRING *dest)
  +STRING *
  +ascii_to_unicode(Interp *interpreter, STRING *source_string, STRING *dest)
   {
  -    internal_exception(UNIMPLEMENTED, "Can't do this yet");
  +    internal_exception(UNIMPLEMENTED,
  +            "to_unicode for iso-8859-1 not implemented");
       return NULL;
   }
   
  -static STRING *
  -from_unicode(Interp *interpreter, STRING *source_string, STRING *dest)
  +STRING *
  +ascii_to_charset(Interp *interpreter, STRING *src, CHARSET *new_charset, 
STRING *dest)
   {
  -    internal_exception(UNIMPLEMENTED, "Can't do this yet");
  -    return NULL;
  +    charset_converter_t conversion_func;
  +
  +    if ((conversion_func = Parrot_find_charset_converter(interpreter,
  +                    src->charset, new_charset))) {
  +         return conversion_func(interpreter, src, dest);
  +    }
  +    else {
  +        STRING *res = ascii_to_unicode(interpreter, src, dest);
  +        return new_charset->from_charset(interpreter, res, dest);
  +
  +    }
   }
   
   /* A noop. can't compose ascii */
  @@ -506,8 +516,8 @@
         ascii_get_graphemes,
         ascii_get_graphemes_inplace,
         set_graphemes,
  -      to_charset,
  -      to_unicode,
  +      ascii_to_charset,
  +      ascii_to_unicode,
         from_charset,
         from_unicode,
         compose,
  @@ -555,6 +565,41 @@
     return return_set;
   }
   
  +STRING *
  +charset_cvt_ascii_to_binary(Interp *interpreter, STRING *src, STRING *dest)
  +{
  +    UINTVAL offs, c;
  +    if (dest) {
  +        Parrot_reallocate_string(interpreter, dest, src->strlen);
  +        dest->bufused = src->bufused;
  +        dest->strlen  = src->strlen;
  +        for (offs = 0; offs < src->strlen; ++offs) {
  +            c = ENCODING_GET_BYTE(interpreter, src, offs);
  +            ENCODING_SET_BYTE(interpreter, dest, offs, c);
  +        }
  +        return dest;
  +    }
  +    src->charset = Parrot_binary_charset_ptr;
  +    return src;
  +}
  +
  +STRING *
  +charset_cvt_ascii_to_iso_8859_1(Interp *interpreter, STRING *src, STRING 
*dest)
  +{
  +    UINTVAL offs, c;
  +    if (dest) {
  +        Parrot_reallocate_string(interpreter, dest, src->strlen);
  +        dest->bufused = src->bufused;
  +        dest->strlen  = src->strlen;
  +        for (offs = 0; offs < src->strlen; ++offs) {
  +            c = ENCODING_GET_BYTE(interpreter, src, offs);
  +            ENCODING_SET_BYTE(interpreter, dest, offs, c);
  +        }
  +        return dest;
  +    }
  +    src->charset = Parrot_iso_8859_1_charset_ptr;
  +    return src;
  +}
   /*
    * Local variables:
    * c-indentation-style: bsd
  
  
  
  1.11      +5 -1      parrot/charset/ascii.h
  
  Index: ascii.h
  ===================================================================
  RCS file: /cvs/public/parrot/charset/ascii.h,v
  retrieving revision 1.10
  retrieving revision 1.11
  diff -u -r1.10 -r1.11
  --- ascii.h   1 Mar 2005 14:19:45 -0000       1.10
  +++ ascii.h   1 Mar 2005 15:41:25 -0000       1.11
  @@ -1,7 +1,7 @@
   /* ascii.h
    *  Copyright: 2004 The Perl Foundation.  All Rights Reserved.
    *  CVS Info
  - *     $Id: ascii.h,v 1.10 2005/03/01 14:19:45 leo Exp $
  + *     $Id: ascii.h,v 1.11 2005/03/01 15:41:25 leo Exp $
    *  Overview:
    *     This is the header for the ascii charset functions
    *  Data Structure and Algorithms:
  @@ -39,6 +39,8 @@
   INTVAL ascii_cs_rindex(Interp *, const STRING *source_string,
           const STRING *search_string, UINTVAL offset);
   size_t ascii_compute_hash(Interp *, STRING *source_string);
  +STRING * ascii_to_unicode(Interp *, STRING *source_string, STRING *dest);
  +STRING * ascii_to_charset(Interp *, STRING *src, CHARSET *new_cs, STRING 
*dest);
   
   static void compose(Interp *, STRING *source_string);
   static void decompose(Interp *, STRING *source_string);
  @@ -63,6 +65,8 @@
   static INTVAL find_not_punctuation(Interp *, STRING *source_string, UINTVAL 
offset);
   CHARSET *Parrot_charset_ascii_init(Interp *);
   
  +STRING *charset_cvt_ascii_to_binary(Interp *, STRING *src, STRING *dest);
  +STRING *charset_cvt_ascii_to_iso_8859_1(Interp *, STRING *src, STRING *dest);
   
   #endif /* PARROT_CHARSET_ASCII_H_GUARD */
   /*
  
  
  
  1.11      +3 -27     parrot/charset/iso-8859-1.c
  
  Index: iso-8859-1.c
  ===================================================================
  RCS file: /cvs/public/parrot/charset/iso-8859-1.c,v
  retrieving revision 1.10
  retrieving revision 1.11
  diff -u -r1.10 -r1.11
  --- iso-8859-1.c      1 Mar 2005 14:19:45 -0000       1.10
  +++ iso-8859-1.c      1 Mar 2005 15:41:25 -0000       1.11
  @@ -1,6 +1,6 @@
   /*
   Copyright: 2004 The Perl Foundation.  All Rights Reserved.
  -$Id: iso-8859-1.c,v 1.10 2005/03/01 14:19:45 leo Exp $
  +$Id: iso-8859-1.c,v 1.11 2005/03/01 15:41:25 leo Exp $
   
   =head1 NAME
   
  @@ -81,30 +81,6 @@
   }
   
   
  -static STRING *
  -to_unicode(Interp *interpreter, STRING *source_string, STRING *dest)
  -{
  -    internal_exception(UNIMPLEMENTED,
  -            "to_unicode for iso-8859-1 not implemented");
  -    return NULL;
  -}
  -
  -static STRING *
  -to_charset(Interp *interpreter, STRING *src, CHARSET *new_charset, STRING 
*dest)
  -{
  -    charset_converter_t conversion_func;
  -
  -    if ((conversion_func = Parrot_find_charset_converter(interpreter,
  -                    src->charset, new_charset))) {
  -         return conversion_func(interpreter, src, dest);
  -    }
  -    else {
  -        STRING *res = to_unicode(interpreter, src, dest);
  -        return new_charset->from_charset(interpreter, res, dest);
  -
  -    }
  -}
  -
   /* A noop. can't compose iso-8859-1 */
   static void
   compose(Interp *interpreter, STRING *source_string)
  @@ -369,8 +345,8 @@
           ascii_get_graphemes,
           ascii_get_graphemes_inplace,
           set_graphemes,
  -        to_charset,
  -        to_unicode,
  +        ascii_to_charset,
  +        ascii_to_unicode,
           from_charset,
           from_unicode,
           compose,
  
  
  
  1.10      +31 -5     parrot/src/charset.c
  
  Index: charset.c
  ===================================================================
  RCS file: /cvs/public/parrot/src/charset.c,v
  retrieving revision 1.9
  retrieving revision 1.10
  diff -u -r1.9 -r1.10
  --- charset.c 1 Mar 2005 14:19:48 -0000       1.9
  +++ charset.c 1 Mar 2005 15:41:26 -0000       1.10
  @@ -1,6 +1,6 @@
   /*
   Copyright: 2004 The Perl Foundation.  All Rights Reserved.
  -$Id: charset.c,v 1.9 2005/03/01 14:19:48 leo Exp $
  +$Id: charset.c,v 1.10 2005/03/01 15:41:26 leo Exp $
   
   =head1 NAME
   
  @@ -15,6 +15,7 @@
   #define PARROT_NO_EXTERN_CHARSET_PTRS
   #include "parrot/parrot.h"
   #include "../charset/iso-8859-1.h"
  +#include "../charset/ascii.h"
   
   CHARSET *Parrot_iso_8859_1_charset_ptr;
   CHARSET *Parrot_binary_charset_ptr;
  @@ -59,6 +60,8 @@
   
       n = all_charsets->n_charsets;
       for (i = 0; i < n; ++i) {
  +        if (all_charsets->set[i].n_converters)
  +            mem_sys_free(all_charsets->set[i].to_converters);
           mem_sys_free(all_charsets->set[i].charset);
       }
       mem_sys_free(all_charsets->set);
  @@ -174,6 +177,24 @@
       return 1;
   }
   
  +static void
  +register_static_converters(Interp *interpreter)
  +{
  +    Parrot_register_charset_converter(interpreter,
  +            Parrot_iso_8859_1_charset_ptr, Parrot_ascii_charset_ptr,
  +            charset_cvt_iso_8859_1_to_ascii);
  +    Parrot_register_charset_converter(interpreter,
  +            Parrot_iso_8859_1_charset_ptr, Parrot_binary_charset_ptr,
  +            charset_cvt_ascii_to_binary);
  +
  +    Parrot_register_charset_converter(interpreter,
  +            Parrot_ascii_charset_ptr, Parrot_binary_charset_ptr,
  +            charset_cvt_ascii_to_binary);
  +    Parrot_register_charset_converter(interpreter,
  +            Parrot_ascii_charset_ptr, Parrot_iso_8859_1_charset_ptr,
  +            charset_cvt_ascii_to_iso_8859_1);
  +}
  +
   INTVAL
   Parrot_register_charset(Interp *interpreter, const char *charsetname,
           CHARSET *charset)
  @@ -199,11 +220,16 @@
           return register_charset(interpreter, charsetname, charset);
       }
       if (!strcmp("ascii", charsetname)) {
  +        INTVAL result;
  +
           Parrot_ascii_charset_ptr = charset;
  -        Parrot_register_charset_converter(interpreter,
  -                Parrot_iso_8859_1_charset_ptr, charset,
  -                charset_cvt_iso_8859_1_to_ascii);
  -        return register_charset(interpreter, charsetname, charset);
  +        result = register_charset(interpreter, charsetname, charset);
  +        /*
  +         * ascii is currently the last charset - so we can
  +         * now install charset converters
  +         */
  +        register_static_converters(interpreter);
  +        return result;
       }
       return 0;
   }
  
  
  
  1.7       +98 -2     parrot/t/op/string_cs.t
  
  Index: string_cs.t
  ===================================================================
  RCS file: /cvs/public/parrot/t/op/string_cs.t,v
  retrieving revision 1.6
  retrieving revision 1.7
  diff -u -r1.6 -r1.7
  --- string_cs.t       1 Mar 2005 14:19:49 -0000       1.6
  +++ string_cs.t       1 Mar 2005 15:41:31 -0000       1.7
  @@ -1,6 +1,6 @@
   #! perl -w
   # Copyright: 2001-2004 The Perl Foundation.  All Rights Reserved.
  -# $Id: string_cs.t,v 1.6 2005/03/01 14:19:49 leo Exp $
  +# $Id: string_cs.t,v 1.7 2005/03/01 15:41:31 leo Exp $
   
   =head1 NAME
   
  @@ -16,7 +16,7 @@
   
   =cut
   
  -use Parrot::Test tests => 20;
  +use Parrot::Test tests => 26;
   use Test::More;
   
   output_is( <<'CODE', <<OUTPUT, "basic syntax" );
  @@ -305,3 +305,99 @@
   ascii
   OUTPUT
   
  +output_is( <<'CODE', <<OUTPUT, "trans_charset_s_s_i iso-8859-1 to binary");
  +    set S0, "abc"
  +    find_charset I0, "binary"
  +    trans_charset S1, S0, I0
  +    print S1
  +    print "\n"
  +    charset I0, S1
  +    charsetname S2, I0
  +    print S2
  +    print "\n"
  +    end
  +CODE
  +abc
  +binary
  +OUTPUT
  +
  +output_is( <<'CODE', <<OUTPUT, "trans_charset_s_i iso-8859-1 to binary");
  +    set S1, "abc"
  +    find_charset I0, "binary"
  +    trans_charset S1, I0
  +    print S1
  +    print "\n"
  +    charset I0, S1
  +    charsetname S2, I0
  +    print S2
  +    print "\n"
  +    end
  +CODE
  +abc
  +binary
  +OUTPUT
  +
  +output_is( <<'CODE', <<OUTPUT, "trans_charset_s_s_i ascii to binary");
  +    set S0, ascii:"abc"
  +    find_charset I0, "binary"
  +    trans_charset S1, S0, I0
  +    print S1
  +    print "\n"
  +    charset I0, S1
  +    charsetname S2, I0
  +    print S2
  +    print "\n"
  +    end
  +CODE
  +abc
  +binary
  +OUTPUT
  +
  +output_is( <<'CODE', <<OUTPUT, "trans_charset_s_i ascii to binary");
  +    set S1, ascii:"abc"
  +    find_charset I0, "binary"
  +    trans_charset S1, I0
  +    print S1
  +    print "\n"
  +    charset I0, S1
  +    charsetname S2, I0
  +    print S2
  +    print "\n"
  +    end
  +CODE
  +abc
  +binary
  +OUTPUT
  +
  +output_is( <<'CODE', <<OUTPUT, "trans_charset_s_s_i ascii to iso-8859-1");
  +    set S0, ascii:"abc"
  +    find_charset I0, "iso-8859-1"
  +    trans_charset S1, S0, I0
  +    print S1
  +    print "\n"
  +    charset I0, S1
  +    charsetname S2, I0
  +    print S2
  +    print "\n"
  +    end
  +CODE
  +abc
  +iso-8859-1
  +OUTPUT
  +
  +output_is( <<'CODE', <<OUTPUT, "trans_charset_s_i ascii to iso-8859-1");
  +    set S1, ascii:"abc"
  +    find_charset I0, "iso-8859-1"
  +    trans_charset S1, I0
  +    print S1
  +    print "\n"
  +    charset I0, S1
  +    charsetname S2, I0
  +    print S2
  +    print "\n"
  +    end
  +CODE
  +abc
  +iso-8859-1
  +OUTPUT
  +
  
  
  

Reply via email to