cvsuser     05/02/28 05:35:47

  Modified:    include/parrot charset.h
               ops      ops.num string.ops
               src      charset.c string.c
               t/op     string_cs.t
  Log:
  Strings. Finally. 2 - some charset opcodes
  * opcodes and interface functions for:
      charset, charsetname, find_charset
  * refine charset registration
  
  Revision  Changes    Path
  1.7       +6 -1      parrot/include/parrot/charset.h
  
  Index: charset.h
  ===================================================================
  RCS file: /cvs/public/parrot/include/parrot/charset.h,v
  retrieving revision 1.6
  retrieving revision 1.7
  diff -u -r1.6 -r1.7
  --- charset.h 28 Feb 2005 10:41:19 -0000      1.6
  +++ charset.h 28 Feb 2005 13:35:43 -0000      1.7
  @@ -1,7 +1,7 @@
   /* charset.h
    *  Copyright: 2004 The Perl Foundation.  All Rights Reserved.
    *  CVS Info
  - *     $Id: charset.h,v 1.6 2005/02/28 10:41:19 leo Exp $
  + *     $Id: charset.h,v 1.7 2005/02/28 13:35:43 leo Exp $
    *  Overview:
    *     This is the header for the 8-bit fixed-width encoding
    *  Data Structure and Algorithms:
  @@ -80,6 +80,11 @@
   typedef INTVAL (*charset_converter_t)(Interp *, CHARSET *lhs, CHARSET *rhs);
   charset_converter_t Parrot_find_charset_converter(Interp *, CHARSET *lhs, 
CHARSET *rhs);
   
  +void Parrot_deinit_charsets(Interp *);
  +INTVAL Parrot_charset_number(Interp *, STRING *charsetname);
  +STRING* Parrot_charset_name(Interp *, INTVAL);
  +INTVAL Parrot_charset_number_of_str(Interp *, STRING *src);
  +
   struct _charset {
       const char *name;
       charset_get_graphemes_t get_graphemes;
  
  
  
  1.57      +6 -0      parrot/ops/ops.num
  
  Index: ops.num
  ===================================================================
  RCS file: /cvs/public/parrot/ops/ops.num,v
  retrieving revision 1.56
  retrieving revision 1.57
  diff -u -r1.56 -r1.57
  --- ops.num   14 Feb 2005 10:57:23 -0000      1.56
  +++ ops.num   28 Feb 2005 13:35:45 -0000      1.57
  @@ -1372,3 +1372,9 @@
   listen_i_p_i                   1342
   listen_i_p_ic                  1343
   accept_p_p                     1344
  +charset_i_s                    1345
  +charset_i_sc                   1346
  +charsetname_s_i                1347
  +charsetname_s_ic               1348
  +find_charset_i_s               1349
  +find_charset_i_sc              1350
  
  
  
  1.32      +34 -0     parrot/ops/string.ops
  
  Index: string.ops
  ===================================================================
  RCS file: /cvs/public/parrot/ops/string.ops,v
  retrieving revision 1.31
  retrieving revision 1.32
  diff -u -r1.31 -r1.32
  --- string.ops        5 Jan 2005 17:03:03 -0000       1.31
  +++ string.ops        28 Feb 2005 13:35:45 -0000      1.32
  @@ -621,6 +621,40 @@
     goto NEXT();
   }
   
  +=item B<charset>(out INT, in STR)
  +
  +Return the charset number of string $2.
  +
  +=item B<charsetname>(out STR, in INT)
  +
  +Return the name of charset numbered $2.
  +
  +=item B<find_charset>(out INT, in STR)
  +
  +Return the charset number of the charset named $2. If the charset doesn't
  +exit, throw an exception.
  +
  +=cut
  +
  +op charset(out INT, in STR) :base_core {
  +  $1 = Parrot_charset_number_of_str(interpreter, $2);
  +  goto NEXT();
  +}
  +
  +op charsetname(out STR, in INT) :base_core {
  +  $1 = string_copy(interpreter, Parrot_charset_name(interpreter, $2));
  +  goto NEXT();
  +}
  +
  +op find_charset(out INT, in STR) :base_core {
  +  INTVAL n = Parrot_charset_number(interpreter, $2);
  +  if (n < 0)
  +    real_exception(interpreter, NULL, 1,
  +     "charset '%Ss' not found", $2);
  +  $1 = n;
  +  goto NEXT();
  +}
  +
   =back
   
   =head1 COPYRIGHT
  
  
  
  1.6       +102 -6    parrot/src/charset.c
  
  Index: charset.c
  ===================================================================
  RCS file: /cvs/public/parrot/src/charset.c,v
  retrieving revision 1.5
  retrieving revision 1.6
  diff -u -r1.5 -r1.6
  --- charset.c 28 Feb 2005 10:41:20 -0000      1.5
  +++ charset.c 28 Feb 2005 13:35:46 -0000      1.6
  @@ -1,6 +1,6 @@
   /*
   Copyright: 2004 The Perl Foundation.  All Rights Reserved.
  -$Id: charset.c,v 1.5 2005/02/28 10:41:20 leo Exp $
  +$Id: charset.c,v 1.6 2005/02/28 13:35:46 leo Exp $
   
   =head1 NAME
   
  @@ -21,13 +21,34 @@
   CHARSET *Parrot_unicode_charset_ptr;
   CHARSET *Parrot_ascii_charset_ptr;
   
  +/*
  + * all registered charsets are collected in one global structure
  + */
  +
  +typedef struct {
  +    CHARSET *charset;
  +    STRING  *name;
  +} One_charset;
  +
  +typedef struct {
  +    int n_charsets;
  +    One_charset *set;
  +} All_charsets;
  +
  +static All_charsets *all_charsets;
  +
  +
   CHARSET *
   Parrot_new_charset(Interp *interpreter)
   {
  -
       return mem_sys_allocate(sizeof(CHARSET));
   }
   
  +void
  +Parrot_deinit_charsets(Interp *interpreter)
  +{
  +}
  +
   CHARSET *
   Parrot_find_charset(Interp *interpreter, const char *charsetname)
   {
  @@ -50,28 +71,103 @@
       return NULL;
   }
   
  +/*
  +
  +=item C<INTVAL Parrot_charset_number(Interp *, STRING *charsetname)>
  +
  +Return the number of the charset or -1 if not found.
  +
  +=item C<INTVAL Parrot_charset_number_of_str(Interp *, const STRING *src)>
  +
  +Return the number of the charset of the given string or -1 if not found.
  +
  +*/
  +
  +INTVAL
  +Parrot_charset_number(Interp *interpreter, STRING *charsetname)
  +{
  +    int i, n;
  +
  +    n = all_charsets->n_charsets;
  +    for (i = 0; i < n; ++i) {
  +        if (!string_equal(interpreter, all_charsets->set[i].name, 
charsetname))
  +            return i;
  +    }
  +    return -1;
  +}
  +
  +INTVAL
  +Parrot_charset_number_of_str(Interp *interpreter, STRING *src)
  +{
  +    int i, n;
  +
  +    n = all_charsets->n_charsets;
  +    for (i = 0; i < n; ++i) {
  +        if (src->charset == all_charsets->set[i].charset)
  +            return i;
  +    }
  +    return -1;
  +}
  +
  +STRING*
  +Parrot_charset_name(Interp *interpreter, INTVAL number_of_charset)
  +{
  +    if (number_of_charset >= all_charsets->n_charsets)
  +        return NULL;
  +    return all_charsets->set[number_of_charset].name;
  +}
  +
  +static INTVAL
  +register_charset(Interp *interpreter, const char *charsetname,
  +        CHARSET *charset)
  +{
  +    int i, n;
  +
  +    n = all_charsets->n_charsets;
  +    for (i = 0; i < n; ++i) {
  +        if (!strcmp(all_charsets->set[i].charset->name, charsetname))
  +            return 0;
  +    }
  +    if (!n)
  +        all_charsets->set = mem_sys_allocate(sizeof(One_charset));
  +    else
  +        all_charsets->set = mem_sys_realloc(all_charsets->set, (n + 1) *
  +                sizeof(One_charset));
  +    all_charsets->n_charsets++;
  +    all_charsets->set[n].charset = charset;
  +    all_charsets->set[n].name = string_from_cstring(interpreter,
  +            charsetname, 0);
  +
  +    return 1;
  +}
  +
   INTVAL
   Parrot_register_charset(Interp *interpreter, const char *charsetname,
           CHARSET *charset)
   {
  +    if (!all_charsets) {
  +        all_charsets = mem_sys_allocate(sizeof(All_charsets));
  +        all_charsets->n_charsets = 0;
  +        all_charsets->set = NULL;
  +    }
       if (!strcmp("binary", charsetname)) {
           Parrot_binary_charset_ptr = charset;
  -        return 1;
  +        return register_charset(interpreter, charsetname, charset);
       }
       if (!strcmp("iso-8859-1", charsetname)) {
           Parrot_iso_8859_1_charset_ptr = charset;
           if (!Parrot_default_charset_ptr) {
               Parrot_default_charset_ptr = charset;
           }
  -        return 1;
  +        return register_charset(interpreter, charsetname, charset);
       }
       if (!strcmp("unicode", charsetname)) {
           Parrot_unicode_charset_ptr = charset;
  -        return 1;
  +        return register_charset(interpreter, charsetname, charset);
       }
       if (!strcmp("ascii", charsetname)) {
           Parrot_ascii_charset_ptr = charset;
  -        return 1;
  +        return register_charset(interpreter, charsetname, charset);
       }
       return 0;
   }
  
  
  
  1.235     +9 -3      parrot/src/string.c
  
  Index: string.c
  ===================================================================
  RCS file: /cvs/public/parrot/src/string.c,v
  retrieving revision 1.234
  retrieving revision 1.235
  diff -u -r1.234 -r1.235
  --- string.c  28 Feb 2005 10:41:20 -0000      1.234
  +++ string.c  28 Feb 2005 13:35:46 -0000      1.235
  @@ -1,6 +1,6 @@
   /*
   Copyright: 2001-2003 The Perl Foundation.  All Rights Reserved.
  -$Id: string.c,v 1.234 2005/02/28 10:41:20 leo Exp $
  +$Id: string.c,v 1.235 2005/02/28 13:35:46 leo Exp $
   
   =head1 NAME
   
  @@ -288,11 +288,16 @@
       } __ptr_u;
   
       if (!interpreter->parent_interpreter) {
  -        /* Load in the basic encodings and charsets */
  +        /* Load in the basic encodings and charsets
  +         *
  +         * the order is crucial here:
  +         * 1) default encoding = fixed_8
  +         * 2) default charset  = iso-8859-1
  +         */
           Parrot_encoding_fixed_8_init(interpreter);
  +        Parrot_charset_iso_8859_1_init(interpreter);
           Parrot_charset_binary_init(interpreter);
           Parrot_charset_ascii_init(interpreter);
  -        Parrot_charset_iso_8859_1_init(interpreter);
   
           /* DEFAULT_ICU_DATA_DIR is configured at build time, or it may be
              set through the $PARROT_ICU_DATA_DIR environment variable. Need
  @@ -354,6 +359,7 @@
   {
       mem_sys_free(interpreter->const_cstring_table);
       interpreter->const_cstring_table = NULL;
  +    Parrot_deinit_charsets(interpreter);
   }
   
   /*
  
  
  
  1.2       +33 -2     parrot/t/op/string_cs.t
  
  Index: string_cs.t
  ===================================================================
  RCS file: /cvs/public/parrot/t/op/string_cs.t,v
  retrieving revision 1.1
  retrieving revision 1.2
  diff -u -r1.1 -r1.2
  --- string_cs.t       28 Feb 2005 10:41:15 -0000      1.1
  +++ string_cs.t       28 Feb 2005 13:35:47 -0000      1.2
  @@ -1,6 +1,6 @@
   #! perl -w
   # Copyright: 2001-2004 The Perl Foundation.  All Rights Reserved.
  -# $Id: string_cs.t,v 1.1 2005/02/28 10:41:15 leo Exp $
  +# $Id: string_cs.t,v 1.2 2005/02/28 13:35:47 leo Exp $
   
   =head1 NAME
   
  @@ -16,7 +16,7 @@
   
   =cut
   
  -use Parrot::Test tests => 1;
  +use Parrot::Test tests => 4;
   use Test::More;
   
   output_is( <<'CODE', <<OUTPUT, "basic syntax" );
  @@ -33,3 +33,34 @@
   ok 3
   OUTPUT
   
  +output_is( <<'CODE', <<OUTPUT, "charset name" );
  +    set S0, ascii:"ok 1\n"
  +    charset I0, S0
  +    charsetname S1, I0
  +    print S1
  +    print "\n"
  +    end
  +CODE
  +ascii
  +OUTPUT
  +
  +output_is( <<'CODE', <<OUTPUT, "find_charset" );
  +    find_charset I0, "iso-8859-1"
  +    print "ok 1\n"
  +    find_charset I0, "ascii"
  +    print "ok 2\n"
  +    find_charset I0, "binary"
  +    print "ok 3\n"
  +    end
  +CODE
  +ok 1
  +ok 2
  +ok 3
  +OUTPUT
  +
  +output_like( <<'CODE', <<OUTPUT, "find_charset - not existing" );
  +    find_charset I0, "no_such"
  +    end
  +CODE
  +/charset 'no_such' not found/
  +OUTPUT
  
  
  

Reply via email to