cvsuser     05/03/02 02:43:16

  Modified:    charset  ascii.c
               encodings fixed_8.c utf8.c
               include/parrot encoding.h string.h
               src      string.c
  Log:
  Strings. Finally. 12 - new iter_init encoding function
  
  * adapt and generalize the iterator stuff from utf8
  * use it in ascii charset
  
  Revision  Changes    Path
  1.17      +13 -5     parrot/charset/ascii.c
  
  Index: ascii.c
  ===================================================================
  RCS file: /cvs/public/parrot/charset/ascii.c,v
  retrieving revision 1.16
  retrieving revision 1.17
  diff -u -r1.16 -r1.17
  --- ascii.c   2 Mar 2005 09:03:25 -0000       1.16
  +++ ascii.c   2 Mar 2005 10:43:13 -0000       1.17
  @@ -1,6 +1,6 @@
   /*
   Copyright: 2004 The Perl Foundation.  All Rights Reserved.
  -$Id: ascii.c,v 1.16 2005/03/02 09:03:25 leo Exp $
  +$Id: ascii.c,v 1.17 2005/03/02 10:43:13 leo Exp $
   
   =head1 NAME
   
  @@ -107,13 +107,16 @@
   from_charset(Interp *interpreter, STRING *src, STRING *dest)
   {
       UINTVAL offs, c;
  +    String_iter iter;
  +
       if (dest) {
           Parrot_reallocate_string(interpreter, dest, src->strlen);
           dest->bufused = src->strlen;
           dest->strlen  = src->strlen;
       }
  +    ENCODING_ITER_INIT(interpreter, src, &iter);
       for (offs = 0; offs < src->strlen; ++offs) {
  -        c = ENCODING_GET_CODEPOINT(interpreter, src, offs);
  +        c = iter.get_and_advance(interpreter, &iter);
           if (c >= 0x80) {
               EXCEPTION(LOSSY_CONVERSION, "lossy conversion to ascii");
           }
  @@ -142,7 +145,8 @@
   }
   
   STRING *
  -ascii_to_charset(Interp *interpreter, STRING *src, CHARSET *new_charset, 
STRING *dest)
  +ascii_to_charset(Interp *interpreter, STRING *src,
  +        CHARSET *new_charset, STRING *dest)
   {
       charset_converter_t conversion_func;
   
  @@ -265,6 +269,7 @@
   {
       INTVAL retval;
       UINTVAL offs, l_len, r_len, min_len;
  +    String_iter iter;
   
       l_len = lhs->strlen;
       r_len = rhs->strlen;
  @@ -276,9 +281,10 @@
       }
       else {
           UINTVAL cl, cr;
  +        ENCODING_ITER_INIT(interpreter, rhs, &iter);
           for (offs = 0; offs < min_len; ++offs) {
               cl = ENCODING_GET_BYTE(interpreter, lhs, offs);
  -            cr = ENCODING_GET_CODEPOINT(interpreter, rhs, offs);
  +            cr = iter.get_and_advance(interpreter, &iter);
               retval = cl - cr;
               if (retval)
                   break;
  @@ -336,9 +342,11 @@
   validate(Interp *interpreter, STRING *src)
   {
       UINTVAL codepoint, offset;
  +    String_iter iter;
   
  +    ENCODING_ITER_INIT(interpreter, src, &iter);
       for (offset = 0; offset < string_length(interpreter, src); ++offset) {
  -        codepoint = ENCODING_GET_CODEPOINT(interpreter, src, offset);
  +        codepoint = iter.get_and_advance(interpreter, &iter);
           if (codepoint >= 0x80)
               return 0;
       }
  
  
  
  1.10      +41 -2     parrot/encodings/fixed_8.c
  
  Index: fixed_8.c
  ===================================================================
  RCS file: /cvs/public/parrot/encodings/fixed_8.c,v
  retrieving revision 1.9
  retrieving revision 1.10
  diff -u -r1.9 -r1.10
  --- fixed_8.c 2 Mar 2005 09:03:28 -0000       1.9
  +++ fixed_8.c 2 Mar 2005 10:43:14 -0000       1.10
  @@ -1,6 +1,6 @@
   /*
   Copyright: 2004 The Perl Foundation.  All Rights Reserved.
  -$Id: fixed_8.c,v 1.9 2005/03/02 09:03:28 leo Exp $
  +$Id: fixed_8.c,v 1.10 2005/03/02 10:43:14 leo Exp $
   
   =head1 NAME
   
  @@ -164,6 +164,43 @@
       return source_string->bufused;
   }
   
  +/*
  + * iterator functions
  + */
  +
  +static UINTVAL
  +fixed8_get_next(Interp *interpreter, String_iter *iter)
  +{
  +    UINTVAL c = get_byte(interpreter, iter->str, iter->charpos++);
  +    iter->bytepos++;
  +    return c;
  +}
  +
  +static void
  +fixed8_set_next(Interp *interpreter, String_iter *iter, UINTVAL c)
  +{
  +    set_byte(interpreter, iter->str, iter->charpos++, c);
  +    iter->bytepos++;
  +}
  +
  +static void
  +fixed8_set_position(Interp *interpreter, String_iter *iter, UINTVAL pos)
  +{
  +    iter->bytepos = iter->charpos = pos;
  +    assert(pos < PObj_buflen(iter->str));
  +}
  +
  +
  +static void
  +iter_init(Interp *interpreter, String *src, String_iter *iter)
  +{
  +    iter->str = src;
  +    iter->bytepos = iter->charpos = 0;
  +    iter->get_and_advance = fixed8_get_next;
  +    iter->set_and_advance = fixed8_set_next;
  +    iter->set_position =    fixed8_set_position;
  +}
  +
   ENCODING *
   Parrot_encoding_fixed_8_init(Interp *interpreter)
   {
  @@ -186,7 +223,9 @@
        set_bytes,
        become_encoding,
        codepoints,
  -     bytes
  +     bytes,
  +     iter_init
  +
       };
       memcpy(return_encoding, &base_encoding, sizeof(ENCODING));
       Parrot_register_encoding(interpreter, "fixed_8", return_encoding);
  
  
  
  1.22      +36 -8     parrot/encodings/utf8.c
  
  Index: utf8.c
  ===================================================================
  RCS file: /cvs/public/parrot/encodings/utf8.c,v
  retrieving revision 1.21
  retrieving revision 1.22
  diff -u -r1.21 -r1.22
  --- utf8.c    2 Mar 2005 09:03:28 -0000       1.21
  +++ utf8.c    2 Mar 2005 10:43:14 -0000       1.22
  @@ -1,6 +1,6 @@
   /*
   Copyright: 2001-2003 The Perl Foundation.  All Rights Reserved.
  -$Id: utf8.c,v 1.21 2005/03/02 09:03:28 leo Exp $
  +$Id: utf8.c,v 1.22 2005/03/02 10:43:14 leo Exp $
   
   =head1 NAME
   
  @@ -215,14 +215,18 @@
   
   =head2 Iterator Functions
   
  -String iteration is currently only used in C<hash_string_equal()>.
  -
   =over 4
   
   =item C<static UINTVAL
  -utf8_decode_and_advance(struct string_iterator_t *i)>
  +utf8_decode_and_advance(Interp *, String_iter *i)>
  +
  +The UTF-8 implementation of the string iterator's C<get_and_advance>
  +function.
  +
  +=item C<static void
  +utf8_encode_and_advance(Interp *, String_iter *i, UINTVAL c)>
   
  -The UTF-8 implementation of the string iterator's C<decode_and_advance>
  +The UTF-8 implementation of the string iterator's C<set_and_advance>
   function.
   
   =cut
  @@ -230,7 +234,7 @@
   */
   
   static UINTVAL
  -utf8_decode_and_advance(struct string_iterator_t *i)
  +utf8_decode_and_advance(Interp *interpreter, String_iter *i)
   {
       const utf8_t *u8ptr = (utf8_t *)((char *)i->str->strstart + i->bytepos);
       UINTVAL c = *u8ptr;
  @@ -263,6 +267,19 @@
       return c;
   }
   
  +static void
  +utf8_encode_and_advance(Interp *interpreter, String_iter *i, UINTVAL c)
  +{
  +    const STRING *s = i->str;
  +    unsigned char *new_pos, *pos;
  +
  +    assert(i->bytepos < PObj_buflen(s) - 4);
  +    pos = (unsigned char *)s->strstart + i->bytepos;
  +    new_pos = utf8_encode(pos, c);
  +    i->bytepos += (new_pos - pos);
  +    i->charpos++;
  +}
  +
   /*
   
   =item C<func>
  @@ -276,7 +293,7 @@
   
   /* XXX Should use quickest direction */
   static void
  -utf8_set_position(struct string_iterator_t *i, Parrot_Int pos)
  +utf8_set_position(Interp *interpreter, String_iter *i, UINTVAL pos)
   {
       const utf8_t *u8ptr = (utf8_t *)i->str->strstart;
   
  @@ -426,6 +443,16 @@
       return src->bufused;
   }
   
  +static void
  +iter_init(Interp *interpreter, String *src, String_iter *iter)
  +{
  +    iter->str = src;
  +    iter->bytepos = iter->charpos = 0;
  +    iter->get_and_advance = utf8_decode_and_advance;
  +    iter->set_and_advance = utf8_encode_and_advance;
  +    iter->set_position =    utf8_set_position;
  +}
  +
   ENCODING *
   Parrot_encoding_utf8_init(Interp *interpreter)
   {
  @@ -448,7 +475,8 @@
        set_bytes,
        become_encoding,
        codepoints,
  -     bytes
  +     bytes,
  +        iter_init
       };
       memcpy(return_encoding, &base_encoding, sizeof(ENCODING));
       Parrot_register_encoding(interpreter, "utf8", return_encoding);
  
  
  
  1.34      +11 -1     parrot/include/parrot/encoding.h
  
  Index: encoding.h
  ===================================================================
  RCS file: /cvs/public/parrot/include/parrot/encoding.h,v
  retrieving revision 1.33
  retrieving revision 1.34
  diff -u -r1.33 -r1.34
  --- encoding.h        2 Mar 2005 09:03:29 -0000       1.33
  +++ encoding.h        2 Mar 2005 10:43:15 -0000       1.34
  @@ -1,7 +1,7 @@
   /* encoding.h
    *  Copyright: 2004 The Perl Foundation.  All Rights Reserved.
    *  CVS Info
  - *     $Id: encoding.h,v 1.33 2005/03/02 09:03:29 leo Exp $
  + *     $Id: encoding.h,v 1.34 2005/03/02 10:43:15 leo Exp $
    *  Overview:
    *     This is the header for the generic encoding functions
    *  Data Structure and Algorithms:
  @@ -31,6 +31,13 @@
   typedef UINTVAL (*encoding_codepoints_t)(Interp*, STRING *src);
   typedef UINTVAL (*encoding_bytes_t)(Interp*, STRING *src);
   
  +/* iterator support */
  +
  +struct string_iterator_t;       /* s. parrot/string.h */
  +
  +typedef void    (*encoding_iter_init_t)(Interp *, STRING *src,
  +        struct string_iterator_t *);
  +
   struct _encoding {
       const char *name;
       UINTVAL max_bytes_per_codepoint;
  @@ -49,6 +56,7 @@
       encoding_become_encoding_t  become_encoding;
       encoding_codepoints_t  codepoints;
       encoding_bytes_t  bytes;
  +    encoding_iter_init_t     iter_init;
   };
   
   typedef struct _encoding ENCODING;
  @@ -104,6 +112,8 @@
       ((ENCODING *)src->encoding)->codepoints(i, src)
   #define ENCODING_BYTES(i, src) \
       ((ENCODING *)src->encoding)->bytes(i, src)
  +#define ENCODING_ITER_INIT(i, src, iter) \
  +    ((ENCODING *)src->encoding)->iter_init(i, src, iter)
   
   #endif /* PARROT_ENCODING_H_GUARD */
   /*
  
  
  
  1.61      +7 -4      parrot/include/parrot/string.h
  
  Index: string.h
  ===================================================================
  RCS file: /cvs/public/parrot/include/parrot/string.h,v
  retrieving revision 1.60
  retrieving revision 1.61
  diff -u -r1.60 -r1.61
  --- string.h  11 Jun 2004 13:48:56 -0000      1.60
  +++ string.h  2 Mar 2005 10:43:15 -0000       1.61
  @@ -1,7 +1,7 @@
   /* string.h
    *  Copyright: 2001-2003 The Perl Foundation.  All Rights Reserved.
    *  CVS Info
  - *     $Id: string.h,v 1.60 2004/06/11 13:48:56 nicholas Exp $
  + *     $Id: string.h,v 1.61 2005/03/02 10:43:15 leo Exp $
    *  Overview:
    *     This is the api header for the string subsystem
    *  Data Structure and Algorithms:
  @@ -38,9 +38,12 @@
       const String *str;
       UINTVAL bytepos;
       UINTVAL charpos;
  -    UINTVAL (*decode_and_advance)(struct string_iterator_t *i);
  -    void (*set_position)(struct string_iterator_t *i, INTVAL pos);
  -} string_iterator;
  +    UINTVAL (*get_and_advance)(Interp *, struct string_iterator_t *i);
  +    void (*set_and_advance)(Interp *, struct string_iterator_t *i, UINTVAL 
c);
  +    void (*set_position)(Interp *, struct string_iterator_t *i, UINTVAL pos);
  +} String_iter;
  +
  +void string_iter_init(Interp *, const String *str, String_iter *);
   
   /* stringinfo parameters */
   
  
  
  
  1.241     +1 -3      parrot/src/string.c
  
  Index: string.c
  ===================================================================
  RCS file: /cvs/public/parrot/src/string.c,v
  retrieving revision 1.240
  retrieving revision 1.241
  diff -u -r1.240 -r1.241
  --- string.c  2 Mar 2005 09:03:30 -0000       1.240
  +++ string.c  2 Mar 2005 10:43:16 -0000       1.241
  @@ -1,6 +1,6 @@
   /*
   Copyright: 2001-2003 The Perl Foundation.  All Rights Reserved.
  -$Id: string.c,v 1.240 2005/03/02 09:03:30 leo Exp $
  +$Id: string.c,v 1.241 2005/03/02 10:43:16 leo Exp $
   
   =head1 NAME
   
  @@ -900,9 +900,7 @@
   INTVAL
   string_compute_strlen(Interp *interpreter, STRING *s)
   {
  -    /* taking advantage of int value of the enum */
       s->strlen = CHARSET_CODEPOINTS(interpreter, s);
  -
       return s->strlen;
   }
   
  
  
  

Reply via email to