cvsuser     03/11/15 03:11:28

  Modified:    encodings dbcs.c singlebyte.c utf16.c utf32.c utf8.c
               include/parrot encoding.h string.h string_funcs.h
               src      string.c
  Log:
  Add string iterator function set_position; currently used in string_chopn
  
  Revision  Changes    Path
  1.3       +18 -2     parrot/encodings/dbcs.c
  
  Index: dbcs.c
  ===================================================================
  RCS file: /cvs/public/parrot/encodings/dbcs.c,v
  retrieving revision 1.2
  retrieving revision 1.3
  diff -u -w -r1.2 -r1.3
  --- dbcs.c    14 Nov 2003 20:26:38 -0000      1.2
  +++ dbcs.c    15 Nov 2003 11:11:20 -0000      1.3
  @@ -1,7 +1,7 @@
   /* dbcs.c
    *  Copyright: 2001-2003 The Perl Foundation.  All Rights Reserved.
    *  CVS Info
  - *     $Id: dbcs.c,v 1.2 2003/11/14 20:26:38 petergibbs Exp $
  + *     $Id: dbcs.c,v 1.3 2003/11/15 11:11:20 petergibbs Exp $
    *  Overview:
    *     This defines the DBCS encoding routines.
    *  Data Structure and Algorithms:
  @@ -106,6 +106,21 @@
       }
   }
   
  +static void
  +dbcs_set_position(struct string_iterator_t *i, Parrot_Int pos)
  +{
  +    const byte_t *bptr = (char *)i->str->strstart;
  +
  +    i->charpos = pos;
  +    while (pos--) {
  +        if (*bptr > 127)
  +            bptr += 2;
  +        else
  +            bptr++;
  +    }
  +    i->bytepos = (const char *)bptr - (const char *)i->str->strstart;
  +}
  +
   const ENCODING dbcs_encoding = {
       enum_encoding_dbcs,
       "dbcs",
  @@ -115,7 +130,8 @@
       dbcs_encode,
       dbcs_skip_forward,
       dbcs_skip_backward,
  -    dbcs_decode_and_advance
  +    dbcs_decode_and_advance,
  +    dbcs_set_position
   };
   
   /*
  
  
  
  1.18      +9 -2      parrot/encodings/singlebyte.c
  
  Index: singlebyte.c
  ===================================================================
  RCS file: /cvs/public/parrot/encodings/singlebyte.c,v
  retrieving revision 1.17
  retrieving revision 1.18
  diff -u -w -r1.17 -r1.18
  --- singlebyte.c      14 Nov 2003 20:26:40 -0000      1.17
  +++ singlebyte.c      15 Nov 2003 11:11:20 -0000      1.18
  @@ -1,7 +1,7 @@
   /* singlebyte.c
    *  Copyright: 2001-2003 The Perl Foundation.  All Rights Reserved.
    *  CVS Info
  - *     $Id: singlebyte.c,v 1.17 2003/11/14 20:26:40 petergibbs Exp $
  + *     $Id: singlebyte.c,v 1.18 2003/11/15 11:11:20 petergibbs Exp $
    *  Overview:
    *     This defines the single byte encoding routines.
    *  Data Structure and Algorithms:
  @@ -68,6 +68,12 @@
       return *ptr;
   }
   
  +static void
  +singlebyte_set_position(struct string_iterator_t *i, Parrot_Int pos)
  +{
  +    i->bytepos = i->charpos = pos;
  +}
  +
   const ENCODING singlebyte_encoding = {
       enum_encoding_singlebyte,
       "singlebyte",
  @@ -77,7 +83,8 @@
       singlebyte_encode,
       singlebyte_skip_forward,
       singlebyte_skip_backward,
  -    singlebyte_decode_and_advance
  +    singlebyte_decode_and_advance,
  +    singlebyte_set_position
   };
   
   /*
  
  
  
  1.15      +29 -3     parrot/encodings/utf16.c
  
  Index: utf16.c
  ===================================================================
  RCS file: /cvs/public/parrot/encodings/utf16.c,v
  retrieving revision 1.14
  retrieving revision 1.15
  diff -u -w -r1.14 -r1.15
  --- utf16.c   14 Nov 2003 20:26:40 -0000      1.14
  +++ utf16.c   15 Nov 2003 11:11:20 -0000      1.15
  @@ -1,7 +1,7 @@
   /* utf16.c
    *  Copyright: 2001-2003 The Perl Foundation.  All Rights Reserved.
    *  CVS Info
  - *     $Id: utf16.c,v 1.14 2003/11/14 20:26:40 petergibbs Exp $
  + *     $Id: utf16.c,v 1.15 2003/11/15 11:11:20 petergibbs Exp $
    *  Overview:
    *     This defines the UTF-16 encoding routines.
    *  Data Structure and Algorithms:
  @@ -134,7 +134,7 @@
   static UINTVAL
   utf16_decode_and_advance(struct string_iterator_t *i)
   {
  -    const utf16_t *u16ptr = (char *)i->str->strstart + i->bytepos;
  +    const utf16_t *u16ptr = (utf16_t *)((char *)i->str->strstart + i->bytepos);
       UINTVAL c = *u16ptr++;
   
       if (UNICODE_IS_HIGH_SURROGATE(c)) {
  @@ -159,6 +159,31 @@
       return c;
   }
   
  +/* XXX Should use quickest direction */
  +static void
  +utf16_set_position(struct string_iterator_t *i, Parrot_Int pos)
  +{
  +    const utf16_t *u16ptr = (utf16_t *)i->str->strstart;
  +
  +    i->charpos = pos;
  +    while (pos-- > 0) {
  +        if (UNICODE_IS_HIGH_SURROGATE(*u16ptr)) {
  +            u16ptr++;
  +
  +            if (!UNICODE_IS_LOW_SURROGATE(*u16ptr)) {
  +                internal_exception(MALFORMED_UTF16,
  +                                   "Malformed UTF-16 surrogate\n");
  +            }
  +        }
  +        else if (UNICODE_IS_LOW_SURROGATE(*u16ptr)) {
  +            internal_exception(MALFORMED_UTF16,
  +                               "Malformed UTF-16 surrogate\n");
  +        }
  +        u16ptr++;
  +    }
  +    i->bytepos = (const char *)u16ptr - (const char *)i->str->strstart;
  +}
  +
   const ENCODING utf16_encoding = {
       enum_encoding_utf16,
       "utf16",
  @@ -168,7 +193,8 @@
       utf16_encode,
       utf16_skip_forward,
       utf16_skip_backward,
  -    utf16_decode_and_advance
  +    utf16_decode_and_advance,
  +    utf16_set_position
   };
   
   /*
  
  
  
  1.13      +10 -2     parrot/encodings/utf32.c
  
  Index: utf32.c
  ===================================================================
  RCS file: /cvs/public/parrot/encodings/utf32.c,v
  retrieving revision 1.12
  retrieving revision 1.13
  diff -u -w -r1.12 -r1.13
  --- utf32.c   14 Nov 2003 20:26:40 -0000      1.12
  +++ utf32.c   15 Nov 2003 11:11:20 -0000      1.13
  @@ -1,7 +1,7 @@
   /* utf32.c
    *  Copyright: 2001-2003 The Perl Foundation.  All Rights Reserved.
    *  CVS Info
  - *     $Id: utf32.c,v 1.12 2003/11/14 20:26:40 petergibbs Exp $
  + *     $Id: utf32.c,v 1.13 2003/11/15 11:11:20 petergibbs Exp $
    *  Overview:
    *     This defines the UTF-32 encoding routines.
    *  Data Structure and Algorithms:
  @@ -73,6 +73,13 @@
       return *u32ptr;
   }
   
  +static void
  +utf32_set_position(struct string_iterator_t *i, Parrot_Int pos)
  +{
  +    i->charpos = pos;
  +    i->bytepos = pos * 4;
  +}
  +
   const ENCODING utf32_encoding = {
       enum_encoding_utf32,
       "utf32",
  @@ -82,7 +89,8 @@
       utf32_encode,
       utf32_skip_forward,
       utf32_skip_backward,
  -    utf32_decode_and_advance
  +    utf32_decode_and_advance,
  +    utf32_set_position
   };
   
   /*
  
  
  
  1.16      +17 -5     parrot/encodings/utf8.c
  
  Index: utf8.c
  ===================================================================
  RCS file: /cvs/public/parrot/encodings/utf8.c,v
  retrieving revision 1.15
  retrieving revision 1.16
  diff -u -w -r1.15 -r1.16
  --- utf8.c    14 Nov 2003 20:26:40 -0000      1.15
  +++ utf8.c    15 Nov 2003 11:11:20 -0000      1.16
  @@ -1,7 +1,7 @@
   /* utf8.c
    *  Copyright: 2001-2003 The Perl Foundation.  All Rights Reserved.
    *  CVS Info
  - *     $Id: utf8.c,v 1.15 2003/11/14 20:26:40 petergibbs Exp $
  + *     $Id: utf8.c,v 1.16 2003/11/15 11:11:20 petergibbs Exp $
    *  Overview:
    *     This defines the UTF-8 encoding routines.
    *  Data Structure and Algorithms:
  @@ -136,16 +136,14 @@
   static UINTVAL
   utf8_decode_and_advance(struct string_iterator_t *i)
   {
  -    const utf8_t *u8ptr = (char *)i->str->strstart + i->bytepos;
  +    const utf8_t *u8ptr = (utf8_t *)((char *)i->str->strstart + i->bytepos);
       UINTVAL c = *u8ptr;
   
       if (UTF8_IS_START(c)) {
           UINTVAL len = UTF8SKIP(u8ptr);
  -        UINTVAL count;
   
           c &= UTF8_START_MASK(len);
           i->bytepos += len;
  -//      for (count = 1; count < len; count++) {
           for (len--; len; len--) {
               u8ptr++;
               if (!UTF8_IS_CONTINUATION(*u8ptr)) {
  @@ -169,6 +167,19 @@
       return c;
   }
   
  +/* XXX Should use quickest direction */
  +static void
  +utf8_set_position(struct string_iterator_t *i, Parrot_Int pos)
  +{
  +    const utf8_t *u8ptr = (char *)i->str->strstart;
  +
  +    i->charpos = pos;
  +    while (pos-- > 0) {
  +        u8ptr += UTF8SKIP(u8ptr);
  +    }
  +    i->bytepos = (const char *)u8ptr - (const char *)i->str->strstart;
  +}
  +
   const ENCODING utf8_encoding = {
       enum_encoding_utf8,
       "utf8",
  @@ -178,7 +189,8 @@
       utf8_encode,
       utf8_skip_forward,
       utf8_skip_backward,
  -    utf8_decode_and_advance
  +    utf8_decode_and_advance,
  +    utf8_set_position
   };
   
   /*
  
  
  
  1.24      +2 -1      parrot/include/parrot/encoding.h
  
  Index: encoding.h
  ===================================================================
  RCS file: /cvs/public/parrot/include/parrot/encoding.h,v
  retrieving revision 1.23
  retrieving revision 1.24
  diff -u -w -r1.23 -r1.24
  --- encoding.h        14 Nov 2003 20:26:55 -0000      1.23
  +++ encoding.h        15 Nov 2003 11:11:24 -0000      1.24
  @@ -1,7 +1,7 @@
   /* encoding.h
    *  Copyright: 2001-2003 The Perl Foundation.  All Rights Reserved.
    *  CVS Info
  - *     $Id: encoding.h,v 1.23 2003/11/14 20:26:55 petergibbs Exp $
  + *     $Id: encoding.h,v 1.24 2003/11/15 11:11:24 petergibbs Exp $
    *  Overview:
    *     This is the api header for the string encoding subsystem
    *  Data Structure and Algorithms:
  @@ -37,6 +37,7 @@
       const void *(*skip_forward) (const void *ptr, Parrot_UInt n);
       const void *(*skip_backward) (const void *ptr, Parrot_UInt n);
       Parrot_UInt(*decode_and_advance) (struct string_iterator_t *i);
  +    void (*set_position)(struct string_iterator_t *i, Parrot_Int pos);
   };
   
   typedef struct parrot_encoding_t* Parrot_Encoding;
  
  
  
  1.56      +6 -5      parrot/include/parrot/string.h
  
  Index: string.h
  ===================================================================
  RCS file: /cvs/public/parrot/include/parrot/string.h,v
  retrieving revision 1.55
  retrieving revision 1.56
  diff -u -w -r1.55 -r1.56
  --- string.h  14 Nov 2003 20:26:57 -0000      1.55
  +++ string.h  15 Nov 2003 11:11:24 -0000      1.56
  @@ -1,7 +1,7 @@
   /* string.h
    *  Copyright: 2001-2003 The Perl Foundation.  All Rights Reserved.
    *  CVS Info
  - *     $Id: string.h,v 1.55 2003/11/14 20:26:57 petergibbs Exp $
  + *     $Id: string.h,v 1.56 2003/11/15 11:11:24 petergibbs Exp $
    *  Overview:
    *     This is the api header for the string subsystem
    *  Data Structure and Algorithms:
  @@ -41,6 +41,7 @@
     UINTVAL bytepos;
     UINTVAL charpos;
     UINTVAL (*decode_and_advance)(struct string_iterator_t *i);
  +    void (*set_position)(struct string_iterator_t *i, INTVAL pos);
   } string_iterator;
   
   /* stringinfo parameters */
  
  
  
  1.31      +2 -1      parrot/include/parrot/string_funcs.h
  
  Index: string_funcs.h
  ===================================================================
  RCS file: /cvs/public/parrot/include/parrot/string_funcs.h,v
  retrieving revision 1.30
  retrieving revision 1.31
  diff -u -w -r1.30 -r1.31
  --- string_funcs.h    14 Nov 2003 20:26:58 -0000      1.30
  +++ string_funcs.h    15 Nov 2003 11:11:24 -0000      1.31
  @@ -1,7 +1,7 @@
   /* string_funcs.h
    *  Copyright: 2001-2003 The Perl Foundation.  All Rights Reserved.
    *  CVS Info
  - *     $Id: string_funcs.h,v 1.30 2003/11/14 20:26:58 petergibbs Exp $
  + *     $Id: string_funcs.h,v 1.31 2003/11/15 11:11:24 petergibbs Exp $
    *  Overview:
    *     This is the api header for the string subsystem
    *  Data Structure and Algorithms:
  @@ -70,6 +70,7 @@
   STRING *string_bitwise_xor(struct Parrot_Interp *interpreter, STRING *s1,
                  STRING *s2, STRING **dest);
   void string_iterator_init(struct string_iterator_t *i, STRING *s);
  +UINTVAL string_decode_and_advance(struct string_iterator_t *i);
   
   #endif
   
  
  
  
  1.159     +17 -19    parrot/src/string.c
  
  Index: string.c
  ===================================================================
  RCS file: /cvs/public/parrot/src/string.c,v
  retrieving revision 1.158
  retrieving revision 1.159
  diff -u -w -r1.158 -r1.159
  --- string.c  14 Nov 2003 20:27:02 -0000      1.158
  +++ string.c  15 Nov 2003 11:11:27 -0000      1.159
  @@ -1,7 +1,7 @@
   /* string.c
    *  Copyright: 2001-2003 The Perl Foundation.  All Rights Reserved.
    *  CVS Info
  - *     $Id: string.c,v 1.158 2003/11/14 20:27:02 petergibbs Exp $
  + *     $Id: string.c,v 1.159 2003/11/15 11:11:27 petergibbs Exp $
    *  Overview:
    *     This is the api definitions for the string subsystem
    *  Data Structure and Algorithms:
  @@ -901,30 +901,26 @@
   STRING *
   string_chopn(STRING *s, INTVAL n)
   {
  -    const char *strstart = s->strstart;
  -    const char *bufend = strstart + s->bufused;
  -    UINTVAL true_n;
  +    UINTVAL new_length;
  +    struct string_iterator_t it;
   
  -    true_n = (UINTVAL)n;
       if (n < 0) {
  -        n = -n;
  -        true_n = (UINTVAL)n;
  -        if (true_n > s->strlen)
  -            true_n = s->strlen;
  -        bufend = s->encoding->skip_forward(strstart, true_n);
  -        s->bufused = bufend - strstart;
  -        s->strlen = true_n;
  +        new_length = -n;
  +        if (new_length > s->strlen)
  +            return s;
       }
       else {
  -        if (true_n > s->strlen)
  -            true_n = s->strlen;
  -
  -        bufend = s->encoding->skip_backward(bufend, true_n);
  -
  -        s->bufused = bufend - strstart;
  -        s->strlen = s->strlen - true_n;
  +        if (s->strlen > (UINTVAL)n)
  +            new_length = s->strlen - n;
  +        else
  +            new_length = 0;
       }
   
  +    string_iterator_init(&it, s);
  +    if (new_length > 0)
  +        it.set_position(&it, new_length);
  +    s->strlen = new_length;
  +    s->bufused = it.bytepos;
       return s;
   }
   
  @@ -1655,7 +1651,9 @@
       i->str = s;
       i->bytepos = 0;
       i->charpos = 0;
  +    /* XXX change this to a bulk copy */
       i->decode_and_advance = s->encoding->decode_and_advance;
  +    i->set_position = s->encoding->set_position;
   }
   
   /*
  
  
  

Reply via email to