Re: Large string patch

David & Lisa Jacobs Sun, 30 Dec 2001 00:44:17 -0800

Here is the string patch with the interpreter left in.  Take your pick :-)


David

Index: Makefile.in
===================================================================
RCS file: /cvs/public/parrot/Makefile.in,v
retrieving revision 1.85
diff -c -r1.85 Makefile.in
*** Makefile.in 27 Dec 2001 23:57:58 -0000 1.85
--- Makefile.in 30 Dec 2001 08:38:15 -0000
***************
*** 44,50 ****

  #XXX This target is not portable to Win32

! shared: libparrot.so libcore_prederef_0_3.so

  libparrot.so: $(O_FILES)
   $(CC) -shared $(C_LIBS) -o $@ $(O_FILES)
--- 44,50 ----

  #XXX This target is not portable to Win32

! shared: Libparrot.so libcore_prederef_0_3.so

  libparrot.so: $(O_FILES)
   $(CC) -shared $(C_LIBS) -o $@ $(O_FILES)
***************
*** 173,178 ****
--- 173,179 ----
   $(RM_F) Parrot/Jit.pm
   $(RM_F) include/parrot/jit_struct.h
   $(RM_F) libparrot.so libcore_prederef_0_3.so
+  $(RM_F) *~
   cd docs && $(MAKE) clean && cd ..
   cd classes && $(MAKE) clean && cd ..
   cd languages && $(MAKE) clean && cd ..
Index: core.ops
===================================================================
RCS file: /cvs/public/parrot/core.ops,v
retrieving revision 1.60
diff -c -r1.60 core.ops
*** core.ops 28 Dec 2001 21:20:19 -0000 1.60
--- core.ops 30 Dec 2001 08:38:18 -0000
***************
*** 104,110 ****

  op err(s) {
    char *tmp = strerror(errno);
!   STRING *s = string_make(interpreter, tmp, strlen(tmp), 0, 0, 0);
    $1 = s;
    goto NEXT();
  }
--- 104,110 ----

  op err(s) {
    char *tmp = strerror(errno);
!   STRING *s = string_make(interpreter, tmp, strlen(tmp), NULL, 0, NULL);
    $1 = s;
    goto NEXT();
  }
***************
*** 165,174 ****
   default: file = (FILE *)$2;
    }

!   string_grow($1, 65535);
    memset(($1)->bufstart, 0, 65535);
    fgets(($1)->bufstart, 65534, file);
!   ($1)->strlen = strlen(($1)->bufstart);
    goto NEXT();
  }

--- 165,174 ----
   default: file = (FILE *)$2;
    }

!   $1 = string_make(interpreter, NULL, 65535, NULL, 0, NULL);
    memset(($1)->bufstart, 0, 65535);
    fgets(($1)->bufstart, 65534, file);
!   ($1)->strlen = ($1)->bufused = strlen(($1)->bufstart);
    goto NEXT();
  }

***************
*** 359,369 ****
    INTVAL len = $3;

    string_destroy($1);
!   tmp = malloc(len + 1);
!   read($2, tmp, len);
!   s = string_make(interpreter, tmp, len, 0, 0, 0);
    $1 = s;
-   free(tmp);
    goto NEXT();
  }

--- 359,368 ----
    INTVAL len = $3;

    string_destroy($1);
!   s = string_make(interpreter, NULL, len, NULL, 0, NULL);
!   read($2, s->bufstart, len);
!   s->bufused = s->buflen;
    $1 = s;
    goto NEXT();
  }

***************
*** 860,865 ****
--- 859,868 ----

  =item B<lt>(s, sc, ic)

+ =item B<lt>(sc, s, ic)
+
+ =item B<lt>(sc, sc, ic)
+
  Branch if $1 is less than $2.

  =cut
***************
*** 885,890 ****
--- 888,900 ----
    goto NEXT();
  }

+ op lt(sc, s|sc, ic) {
+   if (string_compare(interpreter, $1, $2) < 0) {
+     goto OFFSET($3);
+   }
+   goto NEXT();
+ }
+

  ########################################

***************
*** 900,905 ****
--- 910,919 ----

  =item B<le>(s, sc, ic)

+ =item B<le>(sc, s, ic)
+
+ =item B<le>(sc, sc, ic)
+
  Branch if $1 is less than or equal to $2.

  =cut
***************
*** 925,930 ****
--- 939,951 ----
    goto NEXT();
  }

+ op le(sc, s|sc, ic) {
+   if (string_compare(interpreter, $1, $2) <= 0) {
+     goto OFFSET($3);
+   }
+   goto NEXT();
+ }
+

  ########################################

***************
*** 940,945 ****
--- 961,970 ----

  =item B<gt>(s, sc, ic)

+ =item B<gt>(sc, s, ic)
+
+ =item B<gt>(sc, sc, ic)
+
  Branch if $1 is greater than $2.

  =cut
***************
*** 965,970 ****
--- 990,1002 ----
    goto NEXT();
  }

+ op gt(sc, s|sc, ic) {
+   if (string_compare(interpreter, $1, $2) > 0) {
+     goto OFFSET($3);
+   }
+   goto NEXT();
+ }
+

  ########################################

***************
*** 980,985 ****
--- 1012,1021 ----

  =item B<ge>(s, sc, ic)

+ =item B<ge>(sc, s, ic)
+
+ =item B<ge>(sc, sc, ic)
+
  Branch if $1 is greater than or equal to $2.

  =cut
***************
*** 1005,1010 ****
--- 1041,1053 ----
    goto NEXT();
  }

+ op ge(sc, s|sc, ic) {
+   if (string_compare(interpreter, $1, $2) >= 0) {
+     goto OFFSET($3);
+   }
+   goto NEXT();
+ }
+

  ########################################

***************
*** 1035,1041 ****
  }

  op if (s, ic) {
!   if (string_bool(interpreter, $1)) {
      goto OFFSET($2);
    }
    goto NEXT();
--- 1078,1084 ----
  }

  op if (s, ic) {
!   if (string_bool($1)) {
      goto OFFSET($2);
    }
    goto NEXT();
***************
*** 1117,1123 ****
    $1 = $2 + $3;
    goto NEXT();
  }
-

  ########################################

--- 1160,1165 ----
Index: packfile.c
===================================================================
RCS file: /cvs/public/parrot/packfile.c,v
retrieving revision 1.16
diff -c -r1.16 packfile.c
*** packfile.c 6 Dec 2001 21:22:13 -0000 1.16
--- packfile.c 30 Dec 2001 08:38:21 -0000
***************
*** 1484,1493 ****

      self->type   = PFC_STRING;
      if (encoding == 0) {
!         self->string = string_make(interpreter, cursor, size, NULL, flags,
NULL); /* fixme */
      }
      else if (encoding == 3) {
!         self->string = string_make(interpreter, cursor, size,
encoding_lookup("utf32"), flags, chartype_lookup("unicode")); /* fixme */
      }
      else {
        return 0;
--- 1484,1496 ----

      self->type   = PFC_STRING;
      if (encoding == 0) {
!         self->string = string_make(interpreter, cursor, size, NULL, flags,
!                                    NULL); /* fixme */
      }
      else if (encoding == 3) {
!         self->string = string_make(interpreter, cursor, size,
!                                    encoding_lookup("utf32"), flags,
!                                    chartype_lookup("unicode")); /* fixme
*/
      }
      else {
        return 0;
Index: pbc2c.pl
===================================================================
RCS file: /cvs/public/parrot/pbc2c.pl,v
retrieving revision 1.8
diff -c -r1.8 pbc2c.pl
*** pbc2c.pl 27 Dec 2001 21:18:03 -0000 1.8
--- pbc2c.pl 30 Dec 2001 08:38:21 -0000
***************
*** 123,129 ****
          $data = '"' . $data . '"' unless $data =~ m/^"/;

          print <<END_C;
!     c = PackFile_Constant_new_string(interpreter, string_make(interpreter,
$data, $size, $encoding, $flags, $type));
  END_C
        } else {
          die;
--- 123,130 ----
          $data = '"' . $data . '"' unless $data =~ m/^"/;

          print <<END_C;
!     c = PackFile_Constant_new_string(interpreter, string_make(interpreter,
!  $data, $size, $encoding, $flags, $type));
  END_C
        } else {
          die;
Index: string.c
===================================================================
RCS file: /cvs/public/parrot/string.c,v
retrieving revision 1.27
diff -c -r1.27 string.c
*** string.c 29 Dec 2001 22:12:37 -0000 1.27
--- string.c 30 Dec 2001 08:38:22 -0000
***************
*** 1,7 ****
  /* string.c
   *  Copyright: (When this is determined...it will go here)
   *  CVS Info
!  *     $Id: string.c,v 1.27 2001/12/29 22:12:37 dan Exp $
   *  Overview:
   *     This is the api definitions for the string subsystem
   *  Data Structure and Algorithms:
--- 1,7 ----
  /* string.c
   *  Copyright: (When this is determined...it will go here)
   *  CVS Info
!  *     $Id: string.c,v 1.26 2001/12/28 18:20:12 ajgough Exp $
   *  Overview:
   *     This is the api definitions for the string subsystem
   *  Data Structure and Algorithms:
***************
*** 31,39 ****
   * and compute its string length
   */
  STRING *
! string_make(struct Parrot_Interp *interpreter, void *buffer, INTVAL
buflen,
!             const ENCODING *encoding, INTVAL flags, const CHARTYPE *type)
{
!     STRING *s = new_string_header(interpreter);

      if (!type) {
        type = string_native_type;
--- 31,40 ----
   * and compute its string length
   */
  STRING *
! string_make(struct Parrot_Interp *interpreter, const void *buffer,
!             INTVAL buflen, const ENCODING *encoding, INTVAL flags,
!             const CHARTYPE *type) {
!     STRING *s;

      if (!type) {
        type = string_native_type;
***************
*** 43,69 ****
        encoding = encoding_lookup(type->default_encoding);
      }

!     s->bufstart = mem_sys_allocate(buflen);
!     mem_sys_memcopy(s->bufstart, buffer, buflen);
      s->encoding = encoding;
-     s->buflen = s->bufused = buflen;
      s->flags = flags;
-     string_compute_strlen(s);
      s->type = type;

!     return s;
! }
!
! /*=for api string string_grow
!  * reallocate memory for the string if it is too small
!  */
! void
! string_grow(STRING* s, INTVAL newsize) {
!     INTVAL newsize_in_bytes = string_max_bytes(s, newsize);
!     if (s->buflen < newsize_in_bytes) {
!         s->bufstart = mem_sys_realloc(s->bufstart, newsize_in_bytes);
      }
!     s->buflen = newsize_in_bytes;
  }

  /*=for api string string_destroy
--- 44,68 ----
        encoding = encoding_lookup(type->default_encoding);
      }

!     s = mem_sys_allocate(sizeof(STRING)+buflen);
      s->encoding = encoding;
      s->flags = flags;
      s->type = type;
+     s->buflen = buflen;

!     if (buffer) {
!         mem_sys_memcopy(s->bufstart, buffer, buflen);
!         s->bufused = buflen;
!         string_compute_strlen(s);
!     }
!     else {
!         s->strlen = s->bufused = 0;
      }
!
!     /* Make it null terminate. This will simplify making a native string
*/
!     s->bufstart[s->bufused]='\0';
!
!     return s;
  }

  /*=for api string string_destroy
***************
*** 80,86 ****
   * return the length of the string
   */
  INTVAL
! string_length(STRING* s) {
      return s->strlen;
  }

--- 79,85 ----
   * return the length of the string
   */
  INTVAL
! string_length(const STRING* s) {
      return s->strlen;
  }

***************
*** 91,97 ****
   * functions are fleshed out, this function can DTRT.
   */
  static INTVAL
! string_index(STRING* s, INTVAL index) {
      return s->encoding->decode(s->encoding->skip_forward(s->bufstart,
index));
  }

--- 90,96 ----
   * functions are fleshed out, this function can DTRT.
   */
  static INTVAL
! string_index(const STRING* s, INTVAL index) {
      return s->encoding->decode(s->encoding->skip_forward(s->bufstart,
index));
  }

***************
*** 99,105 ****
   * return the length of the string
   */
  INTVAL
! string_ord(STRING* s, INTVAL index) {
      if((s == NULL) || (string_length(s) == 0)) {
          INTERNAL_EXCEPTION(ORD_OUT_OF_STRING,
                             "Cannot get character of empty string");
--- 98,104 ----
   * return the length of the string
   */
  INTVAL
! string_ord(const STRING* s, INTVAL index) {
      if((s == NULL) || (string_length(s) == 0)) {
          INTERNAL_EXCEPTION(ORD_OUT_OF_STRING,
                             "Cannot get character of empty string");
***************
*** 129,136 ****
   * create a copy of the argument passed in
   */
  STRING*
! string_copy(struct Parrot_Interp *interpreter, STRING *s) {
!     return string_make(interpreter, s->bufstart, s->bufused, s->encoding,
                         s->flags, s->type);
  }

--- 128,135 ----
   * create a copy of the argument passed in
   */
  STRING*
! string_copy(struct Parrot_Interp *interpreter, const STRING *s) {
!     return string_make(interpreter, s->bufstart, s->bufused, s->encoding,
                         s->flags, s->type);
  }

***************
*** 138,199 ****
   * create a transcoded copy of the argument passed in
   */
  STRING*
! string_transcode(struct Parrot_Interp *interpreter, STRING *src,
!                  const ENCODING *encoding, const CHARTYPE *type,
!                  STRING *dest) {
!     if (!dest) {
!         dest = string_make(interpreter, NULL, 0, encoding, 0, type);
!     }
!     else {
!         dest->encoding = encoding;
!         dest->type = type;
!     }

!     string_grow(dest, src->strlen);
!
!     if (src->encoding == dest->encoding && src->type == dest->type) {
!         mem_sys_memcopy(dest->bufstart, src->bufstart, src->bufused);
!
!         dest->bufused = src->bufused;
!     }
!     else {
!         CHARTYPE_TRANSCODER transcoder1 = NULL;
!         CHARTYPE_TRANSCODER transcoder2 = NULL;
!         char *srcstart;
!         char *srcend;
!         char *deststart;
!         char *destend;
!
!         if (src->type != dest->type) {
!             transcoder1 = chartype_lookup_transcoder(src->type,
dest->type);
!             if (!transcoder1) {
!                 transcoder1 = chartype_lookup_transcoder(src->type,
!                                   string_unicode_type);
!                 transcoder2 =
chartype_lookup_transcoder(string_unicode_type,
!                                   dest->type);
!             }
!         }
!
!         srcstart = src->bufstart;
!         srcend = srcstart + src->bufused;
!         deststart = dest->bufstart;
!         destend = deststart + dest->buflen;
!
!         while (srcstart < srcend) {
!             INTVAL c = src->encoding->decode(srcstart);
!
!             if (transcoder1) c = transcoder1(c);
!             if (transcoder2) c = transcoder2(c);
!
!             deststart = dest->encoding->encode(deststart, c);
!
!             srcstart = src->encoding->skip_forward(srcstart, 1);
          }

!         dest->bufused = destend - deststart;
      }

      dest->strlen = src->strlen;

      return dest;
  }
--- 137,194 ----
   * create a transcoded copy of the argument passed in
   */
  STRING*
! string_transcode(struct Parrot_Interp *interpreter,
!                  const STRING *src, const ENCODING *encoding,
!                  const CHARTYPE *type, STRING **dest_ptr) {

!     STRING *dest;
!     CHARTYPE_TRANSCODER transcoder1 = NULL;
!     CHARTYPE_TRANSCODER transcoder2 = NULL;
!     void *srcstart;
!     void *srcend;
!     void *deststart;
!     void *destend;
!
!     if (src->encoding == encoding && src->type == type) {
!         return string_copy(interpreter, src);
!     }
!
!     dest = string_make(interpreter, NULL,
src->strlen*src->encoding->max_bytes,
!                        encoding, 0, type);
!
!     if (src->type != dest->type) {
!         transcoder1 = chartype_lookup_transcoder(src->type, dest->type);
!         if (!transcoder1) {
!             transcoder1 = chartype_lookup_transcoder(src->type,
!                                                      string_unicode_type);
!             transcoder2 = chartype_lookup_transcoder(string_unicode_type,
!                                                      dest->type);
          }
+     }

!     srcstart = (void*)src->bufstart;
!     srcend = srcstart + src->bufused;
!     deststart = dest->bufstart;
!     destend = deststart + dest->buflen;
!
!     while (srcstart < srcend) {
!         INTVAL c = src->encoding->decode(srcstart);
!
!         if (transcoder1) c = transcoder1(c);
!         if (transcoder2) c = transcoder2(c);
!
!         deststart = dest->encoding->encode(deststart, c);
!
!         srcstart = src->encoding->skip_forward(srcstart, 1);
      }

+     dest->bufused = destend - deststart;
      dest->strlen = src->strlen;
+     dest->bufstart[dest->bufused]='\0';
+
+     if (dest_ptr) {
+         *dest_ptr = dest;
+     }

      return dest;
  }
***************
*** 209,249 ****
      return s->strlen;
  }

- /*=for api string string_max_bytes
-  * get the maximum number of bytes needed by iv characters
-  */
- INTVAL
- string_max_bytes(STRING* s, INTVAL iv) {
-     return iv * s->encoding->max_bytes;
- }
-
  /*=for api string string_concat
   * concatenate two strings
   */
  STRING*
! string_concat(struct Parrot_Interp *interpreter, STRING* a, STRING* b,
!               INTVAL flags) {
!     if(a != NULL) {
!         if (b == NULL || b->strlen == 0) {
!             return a;
          }
!         if (a->type != b->type || a->encoding != b->encoding) {
!             b = string_transcode(interpreter, b, a->encoding, a->type,
NULL);
          }
-         string_grow(a, a->strlen + b->strlen);
-         mem_sys_memcopy((void*)((ptrcast_t)a->bufstart + a->bufused),
-                           b->bufstart, b->bufused);
-         a->strlen = a->strlen + b->strlen;
-         a->bufused = a->bufused + b->bufused;
      }
      else {
!         if (b == NULL) {
!             return string_make(interpreter, "", 0, 0, 0, 0);
          }
-         return string_make(interpreter,
-                          b->bufstart,b->buflen,b->encoding,flags,b->type);
      }
!     return a;
  }

  /*=for api string string_repeat
--- 204,250 ----
      return s->strlen;
  }

  /*=for api string string_concat
   * concatenate two strings
   */
  STRING*
! string_concat(struct Parrot_Interp *interpreter, const STRING* a,
!               const STRING* b, INTVAL flags) {
!     STRING *result;
!
!     if (a != NULL && a->strlen != 0) {
!         if (b != NULL && b->strlen != 0) {
!             result = string_make(interpreter, NULL, a->bufused +
!                                  b->strlen*a->encoding->max_bytes,
!                                  a->encoding, 0, a->type);
!             mem_sys_memcopy(result->bufstart,a->bufstart,a->bufused);
!             if (a->type != b->type || a->encoding != b->encoding) {
!                 b = string_transcode(interpreter, b, a->encoding, a->type,
NULL);
!             }
!             mem_sys_memcopy((void*)((ptrcast_t)result->bufstart +
a->bufused),
!                             b->bufstart, b->bufused);
!             result->strlen = a->strlen + b->strlen;
!             result->bufused = a->bufused + b->bufused;
!             result->bufstart[result->bufused]='\0';
          }
!         else {
!             return string_copy(interpreter, a);
          }
      }
      else {
!         if (a != NULL) {
!             return string_transcode(interpreter, b, a->encoding, a->type,
NULL);
!         }
!         else {
!             if (b != NULL) {
!                 return string_copy(interpreter, b);
!             }
!             else {
!                 return string_make(interpreter, "", 0, NULL, 0, NULL);
!             }
          }
      }
!     return result;
  }

  /*=for api string string_repeat
***************
*** 251,258 ****
   * Allocates I<d> if needed, also returns d.
  */
  STRING*
! string_repeat(struct Parrot_Interp *interpreter, STRING* s, INTVAL num,
!               STRING** d) {
      STRING* dest;
      INTVAL i;

--- 252,258 ----
   * Allocates I<d> if needed, also returns d.
  */
  STRING*
! string_repeat(struct Parrot_Interp *interpreter, const STRING* s, INTVAL
num, STRING** d) {
      STRING* dest;
      INTVAL i;

***************
*** 260,293 ****
          INTERNAL_EXCEPTION(NEG_REPEAT, "Cannot repeat with negative arg");
      }

!     if (!d || !*d) {
!         dest = string_make(interpreter,
!                            NULL, 0, s->encoding,
!                            0, s->type);
!     }
!     else {
!         dest = *d;
!     }
!     string_grow(dest, s->strlen * num);
      if (num == 0) {
-         dest->strlen = 0;
          return dest;
      }

!     /* copy s into dest */
!     mem_sys_memcopy(dest->bufstart, s->bufstart, s->bufused);
!
!     /* copy from start of dest to later part of dest n times */
!     for (i = 1; i< num; i++) {
          mem_sys_memcopy((void*)((ptrcast_t)dest->bufstart+s->bufused * i),
!                         dest->bufstart, s->bufused);
      }

-     dest->type = s->type;
-     dest->encoding = s->encoding;
-     dest->language = s->language;
      dest->bufused = s->bufused * num;
!     string_compute_strlen(dest);
      return dest;
  }

--- 260,283 ----
          INTERNAL_EXCEPTION(NEG_REPEAT, "Cannot repeat with negative arg");
      }

!     dest = string_make(interpreter, NULL, s->bufused*num, s->encoding, 0,
!                        s->type);
      if (num == 0) {
          return dest;
      }

!     /* copy s into dest num times */
!     for (i = 0; i< num; i++) {
          mem_sys_memcopy((void*)((ptrcast_t)dest->bufstart+s->bufused * i),
!                         s->bufstart, s->bufused);
      }

      dest->bufused = s->bufused * num;
!     dest->strlen = s->strlen *num;
!
!     if (d != NULL) {
!         *d = dest;
!     }
      return dest;
  }

***************
*** 296,306 ****
   * Allocate memory for d if necessary.
   */
  STRING*
! string_substr(struct Parrot_Interp *interpreter, STRING* src, INTVAL
offset,
!               INTVAL length, STRING** d) {
      STRING *dest;
!     char *substart;
!     char *subend;
      if (offset < 0) {
          offset = src->strlen + offset;
      }
--- 286,295 ----
   * Allocate memory for d if necessary.
   */
  STRING*
! string_substr(struct Parrot_Interp *interpreter, const STRING* src, INTVAL
offset, INTVAL length, STRING** d) {
      STRING *dest;
!     void *substart;
!     void *subend;
      if (offset < 0) {
          offset = src->strlen + offset;
      }
***************
*** 314,331 ****
      if (length > (src->strlen - offset) ) {
          length = src->strlen - offset;
      }
!     if (!d || !*d) {
!         dest = string_make(interpreter, NULL, 0, src->encoding, 0,
src->type);
!     }
!     else {
!         dest = *d;
!     }
      substart = src->encoding->skip_forward(src->bufstart, offset);
      subend = src->encoding->skip_forward(substart, length);
-     string_grow(dest, length);
      mem_sys_memcopy(dest->bufstart, substart, subend - substart);
      dest->bufused = subend - substart;
      dest->strlen = length;
      return dest;
  }

--- 303,320 ----
      if (length > (src->strlen - offset) ) {
          length = src->strlen - offset;
      }
!     dest = string_make(interpreter, NULL, length*src->encoding->max_bytes,
!                        src->encoding, 0, src->type);
      substart = src->encoding->skip_forward(src->bufstart, offset);
      subend = src->encoding->skip_forward(substart, length);
      mem_sys_memcopy(dest->bufstart, substart, subend - substart);
      dest->bufused = subend - substart;
      dest->strlen = length;
+     dest->bufstart[dest->bufused]='\0';
+
+     if (d != NULL) {
+         *d = dest;
+     }
      return dest;
  }

***************
*** 334,341 ****
   */
  STRING*
  string_chopn(STRING* s, INTVAL n) {
!     char *bufstart = s->bufstart;
!     char *bufend = bufstart + s->bufused;
      if (n > s->strlen) {
          n = s->strlen;
      }
--- 323,330 ----
   */
  STRING*
  string_chopn(STRING* s, INTVAL n) {
!     void *bufstart = s->bufstart;
!     void *bufend = bufstart + s->bufused;
      if (n > s->strlen) {
          n = s->strlen;
      }
***************
*** 345,350 ****
--- 334,340 ----
      bufend = s->encoding->skip_backward(bufend, n);
      s->bufused = bufend - bufstart;
      s->strlen = s->strlen - n;
+     s->bufstart[s->bufused] = '\0';
      return s;
  }

***************
*** 352,374 ****
   * compare two strings.
   */
  INTVAL
! string_compare(struct Parrot_Interp *interpreter, STRING* s1, STRING* s2)
{
!     char *s1start;
!     char *s1end;
!     char *s2start;
!     char *s2end;
      INTVAL cmp = 0;

      if (s1->type != s2->type || s1->encoding != s2->encoding) {
!         s1 =
!             string_transcode(interpreter, s1, NULL, string_unicode_type,
NULL);
!         s2 =
!             string_transcode(interpreter, s2, NULL, string_unicode_type,
NULL);
      }

!     s1start = s1->bufstart;
      s1end = s1start + s1->bufused;
!     s2start = s2->bufstart;
      s2end = s2start + s2->bufused;

      while (cmp == 0 && s1start < s1end && s2start < s2end) {
--- 342,365 ----
   * compare two strings.
   */
  INTVAL
! string_compare(struct Parrot_Interp *interpreter, const STRING* s1,
!                const STRING* s2) {
!     void *s1start;
!     void *s1end;
!     void *s2start;
!     void *s2end;
      INTVAL cmp = 0;

      if (s1->type != s2->type || s1->encoding != s2->encoding) {
!         s1 = string_transcode(interpreter, s1, NULL, string_unicode_type,
!                               NULL);
!         s2 = string_transcode(interpreter, s2, NULL, string_unicode_type,
!                               NULL);
      }

!     s1start = (void*)s1->bufstart;
      s1end = s1start + s1->bufused;
!     s2start = (void*)s2->bufstart;
      s2end = s2start + s2->bufused;

      while (cmp == 0 && s1start < s1end && s2start < s2end) {
***************
*** 388,394 ****
  }

  /* A string is "true" if it is equal to anything but "" and "0" */
! BOOLVAL string_bool (struct Parrot_Interp *interpreter, STRING* s) {
      INTVAL len;
      if (s == NULL) {
          return 0;
--- 379,385 ----
  }

  /* A string is "true" if it is equal to anything but "" and "0" */
! BOOLVAL string_bool (const STRING* s) {
      INTVAL len;
      if (s == NULL) {
          return 0;
***************
*** 423,434 ****
    rounding towards zero.
  */

! INTVAL string_to_int (struct Parrot_Interp *interpreter, STRING *s) {
      INTVAL i = 0;

      if (s) {
!         char *start = s->bufstart;
!         char *end = start + s->bufused;
          int sign = 1;
          BOOLVAL in_number = 0;

--- 414,425 ----
    rounding towards zero.
  */

! INTVAL string_to_int (const STRING *s) {
      INTVAL i = 0;

      if (s) {
!         void *start = (void*)s->bufstart;
!         void *end = start + s->bufused;
          int sign = 1;
          BOOLVAL in_number = 0;

***************
*** 461,472 ****
      return i;
  }

! FLOATVAL string_to_num (struct Parrot_Interp *interpreter, STRING *s) {
      FLOATVAL f = 0.0;

      if (s) {
!         char *start = s->bufstart;
!         char *end = start + s->bufused;
          int sign = 1;
          BOOLVAL seen_dot = 0;
          BOOLVAL seen_e = 0;
--- 452,463 ----
      return i;
  }

! FLOATVAL string_to_num (const STRING *s) {
      FLOATVAL f = 0.0;

      if (s) {
!         void *start = (void*)s->bufstart;
!         void *end = start + s->bufused;
          int sign = 1;
          BOOLVAL seen_dot = 0;
          BOOLVAL seen_e = 0;
Index: classes/perlstring.pmc
===================================================================
RCS file: /cvs/public/parrot/classes/perlstring.pmc,v
retrieving revision 1.10
diff -c -r1.10 perlstring.pmc
*** classes/perlstring.pmc 28 Dec 2001 18:20:12 -0000 1.10
--- classes/perlstring.pmc 30 Dec 2001 08:38:23 -0000
***************
*** 47,53 ****

      INTVAL get_integer () {
   STRING* s = (STRING*) SELF->cache.struct_val;
!  return string_to_int(interpreter, s);
      }

      INTVAL get_integer_index (INTVAL index) {
--- 47,53 ----

      INTVAL get_integer () {
   STRING* s = (STRING*) SELF->cache.struct_val;
!  return string_to_int(s);
      }

      INTVAL get_integer_index (INTVAL index) {
***************
*** 55,61 ****

      FLOATVAL get_number () {
   STRING* s = (STRING*) SELF->cache.struct_val;
!  return string_to_num(interpreter, s);
      }

      FLOATVAL get_number_index (INTVAL index) {
--- 55,61 ----

      FLOATVAL get_number () {
   STRING* s = (STRING*) SELF->cache.struct_val;
!  return string_to_num(s);
      }

      FLOATVAL get_number_index (INTVAL index) {
***************
*** 69,75 ****
      }

      BOOLVAL get_bool () {
!  return string_bool(interpreter, SELF->cache.struct_val);
      }

      void* get_value () {
--- 69,75 ----
      }

      BOOLVAL get_bool () {
!  return string_bool(SELF->cache.struct_val);
      }

      void* get_value () {
***************
*** 455,461 ****
   dest->vtable = &Parrot_base_vtables[enum_class_PerlString];
   dest->cache.struct_val =
             string_repeat(INTERP, SELF->cache.struct_val,
!                   string_to_int(interpreter, value), NULL
                    );
      }

--- 455,461 ----
   dest->vtable = &Parrot_base_vtables[enum_class_PerlString];
   dest->cache.struct_val =
             string_repeat(INTERP, SELF->cache.struct_val,
!                   string_to_int(value), NULL
                    );
      }

***************
*** 463,469 ****
   dest->vtable = &Parrot_base_vtables[enum_class_PerlString];
   dest->cache.struct_val =
             string_repeat(INTERP, SELF->cache.struct_val,
!                   string_to_int(interpreter, value), NULL
                    );
      }

--- 463,469 ----
   dest->vtable = &Parrot_base_vtables[enum_class_PerlString];
   dest->cache.struct_val =
             string_repeat(INTERP, SELF->cache.struct_val,
!                   string_to_int(value), NULL
                    );
      }

***************
*** 471,477 ****
   dest->vtable = &Parrot_base_vtables[enum_class_PerlString];
   dest->cache.struct_val =
             string_repeat(INTERP, SELF->cache.struct_val,
!                   string_to_int(interpreter, value), NULL
                    );
      }

--- 471,477 ----
   dest->vtable = &Parrot_base_vtables[enum_class_PerlString];
   dest->cache.struct_val =
             string_repeat(INTERP, SELF->cache.struct_val,
!                   string_to_int(value), NULL
                    );
      }

Index: docs/strings.pod
===================================================================
RCS file: /cvs/public/parrot/docs/strings.pod,v
retrieving revision 1.7
diff -c -r1.7 strings.pod
*** docs/strings.pod 28 Dec 2001 18:20:12 -0000 1.7
--- docs/strings.pod 30 Dec 2001 08:38:24 -0000
***************
*** 77,83 ****

  To repeat a string, (ie, turn 'xyz' into 'xyzxyzxyz') use:

!     STRING* string_repeat(struct Parrot_Interp *, STRING* s, INTVAL n,
STRING** d)

  Which will repeat string I<s> n times and store the result into I<d>,
which it
  also returns.  If I<*d> or I<**d> is NULL, a new string will be allocated
--- 77,83 ----

  To repeat a string, (ie, turn 'xyz' into 'xyzxyzxyz') use:

!     STRING* string_repeat(STRING* s, INTVAL n, STRING** d)

  Which will repeat string I<s> n times and store the result into I<d>,
which it
  also returns.  If I<*d> or I<**d> is NULL, a new string will be allocated
***************
*** 91,97 ****

  To retrieve a substring of the string, call

!     STRING* string_substr(struct Parrot_Interp *, STRING* src, INTVAL
offset, INTVAL length, STRING** dest)

  The result will be placed in C<dest>.
  (Passing in C<dest> avoids allocating a new string at runtime. If
--- 91,97 ----

  To retrieve a substring of the string, call

!     STRING* string_substr(STRING* src, INTVAL offset, INTVAL length,
STRING** dest)

  The result will be placed in C<dest>.
  (Passing in C<dest> avoids allocating a new string at runtime. If
***************
*** 113,119 ****

  To compare two strings, use:

!     INTVAL string_compare(struct Parrot_Interp *, STRING* s1, STRING* s2)

  The value returned will be less than, equal to, or greater than zero
  depending on whether C<s1> is less than, equal to, or greater than C<s2>.
--- 113,119 ----

  To compare two strings, use:

!     INTVAL string_compare(STRING* s1, STRING* s2)

  The value returned will be less than, equal to, or greater than zero
  depending on whether C<s1> is less than, equal to, or greater than C<s2>.
***************
*** 124,130 ****

  To test a string for truth, use:

!     BOOLVAL string_bool(struct Parrot_Interp *, STRING* s);

  A string is false if it

--- 124,130 ----

  To test a string for truth, use:

!     BOOLVAL string_bool(STRING* s);

  A string is false if it

***************
*** 152,158 ****
  structure in F<string.h>:

      struct parrot_string {
-       void *bufstart;
        INTVAL buflen;
        INTVAL bufused;
        INTVAL flags;
--- 152,157 ----
***************
*** 160,176 ****
        INTVAL encoding;
        INTVAL type;
        INTVAL unused;
      };

  Let's look at each element of this structure in turn.

- =head2 C<bufstart>
-
- This pointer points to the buffer which holds the string, encoded in
- whatever is the string's specified encoding. Because of this, you should
- not make any assumptions about what's in the buffer, and hence you
- shouldn't try and access it directly.
-
  =head2 C<buflen>

  This is used for memory allocation; it tells you the currently allocated
--- 159,169 ----
        INTVAL encoding;
        INTVAL type;
        INTVAL unused;
+       char bufstart[1];
      };

  Let's look at each element of this structure in turn.

  =head2 C<buflen>

  This is used for memory allocation; it tells you the currently allocated
***************
*** 236,241 ****
--- 229,241 ----
  This field is, as its name suggests, unused; however, it can be used to
  hold a pointer to the correct vtable for foreign strings.

+ =head2 C<bufstart>
+
+ This pointer points to the buffer which holds the string, encoded in
+ whatever is the string's specified encoding. Because of this, you should
+ not make any assumptions about what's in the buffer, and hence you
+ shouldn't try and access it directly.
+
  =head1 String Vtable Functions

  The L</String Manipulation Functions> above are implemented in terms of
***************
*** 326,357 ****
  not helping construct the Parrot core itself, you probably want to look
  away now.

- The first two functions to note are
-
      INTVAL string_compute_strlen(STRING* s)

! and
!
!     INTVAL string_max_bytes(STRING *s, INTVAL iv)
!
! The first updates the contents of C<< s->strlen >> by contemplating the
! buffer C<bufstart> and working out how many characters it contains. The
! second is given a number of characters which we assume are going to be
! added into the string at some point; it returns the maximum number of
! bytes that need to be allocated to admit that number of characters. For
! fixed-width encodings, this is trivial - the "native" encoding, for
! instance, encodes one byte per character, so C<string_native_max_bytes>
! simply returns the C<INTVAL> it is passed; C<string_utf8_max_bytes>, on
the
! other hand, returns three times the value that it is passed because a
! UTF8 character may occupy up to three bytes.
!
! To grow a string to a specified size, use
!
!     void string_grow(STRING *s, INTVAL newsize)
!
! The size is given in characters; C<string_max_bytes> is called to turn
! this into a size in bytes, and then the buffer is grown to accomodate
! (at least) that many bytes.

  =head1 Transcoding

--- 326,335 ----
  not helping construct the Parrot core itself, you probably want to look
  away now.

      INTVAL string_compute_strlen(STRING* s)

! Updates the contents of C<< s->strlen >> by contemplating the
! buffer C<bufstart> and working out how many characters it contains.

  =head1 Transcoding

Index: encodings/singlebyte.c
===================================================================
RCS file: /cvs/public/parrot/encodings/singlebyte.c,v
retrieving revision 1.5
diff -c -r1.5 singlebyte.c
*** encodings/singlebyte.c 6 Dec 2001 00:11:24 -0000 1.5
--- encodings/singlebyte.c 30 Dec 2001 08:38:24 -0000
***************
*** 41,55 ****
  }

  static void *
! singlebyte_skip_forward (void *ptr, INTVAL n) {
!     byte_t *bptr = ptr;

      return bptr + n;
  }

  static void *
! singlebyte_skip_backward (void *ptr, INTVAL n) {
!     byte_t *bptr = ptr;

      return bptr - n;
  }
--- 41,55 ----
  }

  static void *
! singlebyte_skip_forward (const void *ptr, INTVAL n) {
!     byte_t *bptr = (byte_t*)ptr;

      return bptr + n;
  }

  static void *
! singlebyte_skip_backward (const void *ptr, INTVAL n) {
!     byte_t *bptr = (byte_t*)ptr;

      return bptr - n;
  }
Index: encodings/utf16.c
===================================================================
RCS file: /cvs/public/parrot/encodings/utf16.c,v
retrieving revision 1.4
diff -c -r1.4 utf16.c
*** encodings/utf16.c 6 Dec 2001 00:11:24 -0000 1.4
--- encodings/utf16.c 30 Dec 2001 08:38:24 -0000
***************
*** 77,84 ****
  }

  static void *
! utf16_skip_forward (void *ptr, INTVAL n) {
!     utf16_t *u16ptr = ptr;

      while (n-- > 0) {
        if (UNICODE_IS_HIGH_SURROGATE(*u16ptr)) {
--- 77,84 ----
  }

  static void *
! utf16_skip_forward (const void *ptr, INTVAL n) {
!     utf16_t *u16ptr = (utf16_t*)ptr;

      while (n-- > 0) {
        if (UNICODE_IS_HIGH_SURROGATE(*u16ptr)) {
***************
*** 100,107 ****
  }

  static void *
! utf16_skip_backward (void *ptr, INTVAL n) {
!     utf16_t *u16ptr = ptr;

      while (n--> 0) {
          u16ptr--;
--- 100,107 ----
  }

  static void *
! utf16_skip_backward (const void *ptr, INTVAL n) {
!     utf16_t *u16ptr = (utf16_t*)ptr;

      while (n--> 0) {
          u16ptr--;
Index: encodings/utf32.c
===================================================================
RCS file: /cvs/public/parrot/encodings/utf32.c,v
retrieving revision 1.1
diff -c -r1.1 utf32.c
*** encodings/utf32.c 31 Oct 2001 22:51:31 -0000 1.1
--- encodings/utf32.c 30 Dec 2001 08:38:24 -0000
***************
*** 44,58 ****
  }

  static void *
! utf32_skip_forward (void *ptr, INTVAL n) {
!     utf32_t *u32ptr = ptr;

      return u32ptr + n;
  }

  static void *
! utf32_skip_backward (void *ptr, INTVAL n) {
!     utf32_t *u32ptr = ptr;

      return u32ptr - n;
  }
--- 44,58 ----
  }

  static void *
! utf32_skip_forward (const void *ptr, INTVAL n) {
!     utf32_t *u32ptr = (utf32_t*)ptr;

      return u32ptr + n;
  }

  static void *
! utf32_skip_backward (const void *ptr, INTVAL n) {
!     utf32_t *u32ptr = (utf32_t*)ptr;

      return u32ptr - n;
  }
Index: encodings/utf8.c
===================================================================
RCS file: /cvs/public/parrot/encodings/utf8.c,v
retrieving revision 1.4
diff -c -r1.4 utf8.c
*** encodings/utf8.c 6 Dec 2001 00:11:24 -0000 1.4
--- encodings/utf8.c 30 Dec 2001 08:38:25 -0000
***************
*** 97,104 ****
  }

  static void *
! utf8_skip_forward (void *ptr, INTVAL n) {
!     utf8_t *u8ptr = ptr;

      while (n-- > 0) {
          u8ptr += UTF8SKIP(u8ptr);
--- 97,104 ----
  }

  static void *
! utf8_skip_forward (const void *ptr, INTVAL n) {
!     utf8_t *u8ptr = (utf8_t*)ptr;

      while (n-- > 0) {
          u8ptr += UTF8SKIP(u8ptr);
***************
*** 108,115 ****
  }

  static void *
! utf8_skip_backward (void *ptr, INTVAL n) {
!     utf8_t *u8ptr = ptr;

      while (n-- > 0) {
          u8ptr--;
--- 108,115 ----
  }

  static void *
! utf8_skip_backward (const void *ptr, INTVAL n) {
!     utf8_t *u8ptr = (utf8_t*)ptr;

      while (n-- > 0) {
          u8ptr--;
Index: include/parrot/encoding.h
===================================================================
RCS file: /cvs/public/parrot/include/parrot/encoding.h,v
retrieving revision 1.4
diff -c -r1.4 encoding.h
*** include/parrot/encoding.h 6 Dec 2001 00:11:24 -0000 1.4
--- include/parrot/encoding.h 30 Dec 2001 08:38:25 -0000
***************
*** 19,26 ****
      INTVAL (*characters)(const void *ptr, INTVAL bytes);
      INTVAL (*decode)(const void *ptr);
      void *(*encode)(void *ptr, INTVAL c);
!     void *(*skip_forward)(void *ptr, INTVAL n);
!     void *(*skip_backward)(void *ptr, INTVAL n);
  } ENCODING;

  const ENCODING *
--- 19,26 ----
      INTVAL (*characters)(const void *ptr, INTVAL bytes);
      INTVAL (*decode)(const void *ptr);
      void *(*encode)(void *ptr, INTVAL c);
!     void *(*skip_forward)(const void *ptr, INTVAL n);
!     void *(*skip_backward)(const void *ptr, INTVAL n);
  } ENCODING;

  const ENCODING *
Index: include/parrot/string.h
===================================================================
RCS file: /cvs/public/parrot/include/parrot/string.h,v
retrieving revision 1.15
diff -c -r1.15 string.h
*** include/parrot/string.h 28 Dec 2001 18:20:12 -0000 1.15
--- include/parrot/string.h 30 Dec 2001 08:38:25 -0000
***************
*** 16,22 ****
  #include "parrot/parrot.h"

  typedef struct {
-     void *bufstart;
      INTVAL buflen;
      INTVAL flags;
      INTVAL bufused;
--- 16,21 ----
***************
*** 24,29 ****
--- 23,29 ----
      const ENCODING *encoding;
      const CHARTYPE *type;
      INTVAL language;
+     char bufstart[1];
  } STRING;


***************
*** 31,72 ****

  INTVAL
  string_compute_strlen(STRING*);
- INTVAL
- string_max_bytes(STRING*, INTVAL);
  STRING*
! string_concat(struct Parrot_Interp *, STRING*, STRING*, INTVAL);
  STRING*
! string_repeat(struct Parrot_Interp *, STRING* , INTVAL, STRING**);
  STRING*
  string_chopn(STRING*, INTVAL);
  STRING*
! string_substr(struct Parrot_Interp *interpreter, STRING*, INTVAL, INTVAL,
STRING**);
  INTVAL
! string_compare(struct Parrot_Interp *, STRING*, STRING*);
  BOOLVAL
! string_bool(struct Parrot_Interp *, STRING*);

  /* Declarations of other functions */
  INTVAL
! string_length(STRING*);
  INTVAL
! string_ord(STRING* s, INTVAL index);
  FLOATVAL
! string_to_num (struct Parrot_Interp *interpreter, STRING *s);
  INTVAL
! string_to_int (struct Parrot_Interp *interpreter, STRING *s);
! void
! string_grow(STRING* s, INTVAL newsize);
  void
  string_destroy(STRING* s);
  STRING*
! string_make(struct Parrot_Interp *interpreter, void *buffer, INTVAL
buflen, const ENCODING *encoding, INTVAL flags, const CHARTYPE *type);
  STRING*
! string_copy(struct Parrot_Interp *interpreter, STRING *i);
  STRING*
! string_transcode(struct Parrot_Interp *interpreter, STRING *src, const
ENCODING *encoding, const CHARTYPE *type, STRING *dest);
  void
  string_init(void);

  #endif

--- 31,76 ----

  INTVAL
  string_compute_strlen(STRING*);
  STRING*
! string_concat(struct Parrot_Interp *interpreter, const STRING*, const
STRING*,
!               INTVAL);
  STRING*
! string_repeat(struct Parrot_Interp *interpreter, const STRING* , INTVAL,
!               STRING**);
  STRING*
  string_chopn(STRING*, INTVAL);
  STRING*
! string_substr(struct Parrot_Interp *interpreter, const STRING*, INTVAL,
!               INTVAL, STRING**);
  INTVAL
! string_compare(struct Parrot_Interp *interpreter, const STRING*, const
STRING*);
  BOOLVAL
! string_bool(const STRING*);

  /* Declarations of other functions */
  INTVAL
! string_length(const STRING*);
  INTVAL
! string_ord(const STRING* s, INTVAL index);
  FLOATVAL
! string_to_num (const STRING *s);
  INTVAL
! string_to_int (const STRING *s);
  void
  string_destroy(STRING* s);
  STRING*
! string_make(struct Parrot_Interp *interpreter, const void *buffer,
!             INTVAL buflen, const ENCODING *encoding, INTVAL flags,
!             const CHARTYPE *type);
  STRING*
! string_copy(struct Parrot_Interp *interpreter, const STRING *i);
  STRING*
! string_transcode(struct Parrot_Interp *interpreter, const STRING *src,
!                  const ENCODING *encoding, const CHARTYPE *type, STRING
**d);
  void
  string_init(void);
+ static INTVAL
+ string_index(const STRING* s, INTVAL index);

  #endif

Index: t/op/string.t
===================================================================
RCS file: /cvs/public/parrot/t/op/string.t,v
retrieving revision 1.16
diff -c -r1.16 string.t
*** t/op/string.t 28 Dec 2001 18:20:13 -0000 1.16
--- t/op/string.t 30 Dec 2001 08:38:28 -0000
***************
*** 1,13 ****
  #! perl -w

! use Parrot::Test tests => 48;

! output_is( <<'CODE', <<OUTPUT, "set_s_sc" );
   set S4, "JAPH\n"
   print S4
   end
  CODE
  JAPH
  OUTPUT

  output_is( <<'CODE', '4', "length_i_s" );
--- 1,16 ----
  #! perl -w

! use Parrot::Test tests => 63;

! output_is( <<'CODE', <<OUTPUT, "set_s_s|sc" );
   set S4, "JAPH\n"
+  set S5, S4
   print S4
+  print S5
   end
  CODE
  JAPH
+ JAPH
  OUTPUT

  output_is( <<'CODE', '4', "length_i_s" );
***************
*** 18,38 ****
   end
  CODE

! output_is( <<'CODE', <<OUTPUT, "chopn_s_ic" );
   set S4, "JAPHxyzw"
   set S5, "japhXYZW"
!  set S3, "\n"
   chopn S4, 3
   chopn S4, 1
!  chopn S5, 4
   print S4
!  print S3
   print S5
   print S3
   end
  CODE
  JAPH
  japh
  OUTPUT

  output_is(<<'CODE', <<OUTPUT, "chopn, OOB values");
--- 21,46 ----
   end
  CODE

! output_is( <<'CODE', <<OUTPUT, "chopn_s_i|ic" );
   set S4, "JAPHxyzw"
   set S5, "japhXYZW"
!  set S3, S4
!  set S1  "\n"
!  set I1  4
   chopn S4, 3
   chopn S4, 1
!  chopn S5, I1
   print S4
!  print S1
   print S5
+  print S1
   print S3
+  print S1
   end
  CODE
  JAPH
  japh
+ JAPHxyzw
  OUTPUT

  output_is(<<'CODE', <<OUTPUT, "chopn, OOB values");
***************
*** 57,81 ****
  ** nothing **
  OUTPUT

! output_is( <<'CODE', 'JAPH', "substr_s_s_i_i" );
   set S4, "12345JAPH01"
   set I4, 5
   set I5, 4
   substr S5, S4, I4, I5
   print S5
   end
  CODE

  # negative offsets
  output_is(<<'CODE', <<'OUTPUT', "neg substr offset");
   set S0, "A string of length 21"
!  set I0, -9
!  set I1, 6
!  substr_s_s_i S1, S0, I0, I1
!  print S0
!  print "\n"
!  print S1
!  print "\n"
   end
  CODE
  A string of length 21
--- 65,106 ----
  ** nothing **
  OUTPUT

! output_is( <<'CODE', <<'OUTPUT', "substr_s_s|sc_i|ic_i|ic" );
   set S4, "12345JAPH01"
   set I4, 5
   set I5, 4
   substr S5, S4, I4, I5
   print S5
+  substr S5, S4, I4, 4
+  print S5
+  substr S5, S4, 5, I5
+  print S5
+  substr S5, S4, 5, 4
+  print S5
+  substr S5, "12345JAPH01", I4, I5
+  print S5
+  substr S5, "12345JAPH01", I4, 4
+  print S5
+  substr S5, "12345JAPH01", 5, I5
+  print S5
+  substr S5, "12345JAPH01", 5, 4
+  print S5
+  print "\n"
   end
  CODE
+ JAPHJAPHJAPHJAPHJAPHJAPHJAPHJAPH
+ OUTPUT

  # negative offsets
  output_is(<<'CODE', <<'OUTPUT', "neg substr offset");
   set S0, "A string of length 21"
!  set I0, -9
!  set I1, 6
!  substr S1, S0, I0, I1
!  print S0
!  print "\n"
!  print S1
!  print "\n"
   end
  CODE
  A string of length 21
***************
*** 83,110 ****
  OUTPUT

  # This asks for substring it shouldn't be allowed...
! output_is(<<'CODE', 'Cannot take substr outside string', "sub err:OOR");
   set S0, "A string of length 21"
!  set I0, -99
!  set I1, 6
!  substr_s_s_i S1, S0, I0, I1
!  print S0
!  print "\n"
!  print S1
!  print "\n"
   end
  CODE

  # This asks for substring much greater than length of original string
  output_is(<<'CODE', <<'OUTPUT', "len>strlen");
   set S0, "A string of length 21"
!  set I0, 12
!  set I1, 1000
!  substr_s_s_i S1, S0, I0, I1
!  print S0
!  print "\n"
!  print S1
!  print "\n"
   end
  CODE
  A string of length 21
--- 108,140 ----
  OUTPUT

  # This asks for substring it shouldn't be allowed...
! output_is(<<'CODE', 'Cannot take substr outside string', "substr OOB");
   set S0, "A string of length 21"
!  set I0, -99
!  set I1, 6
!  substr S1, S0, I0, I1
!  end
! CODE
!
! # This asks for substring it shouldn't be allowed...
! output_is(<<'CODE', 'Cannot take substr outside string', "substr OOB");
!  set S0, "A string of length 21"
!  set I0, 99
!  set I1, 6
!  substr S1, S0, I0, I1
   end
  CODE

  # This asks for substring much greater than length of original string
  output_is(<<'CODE', <<'OUTPUT', "len>strlen");
   set S0, "A string of length 21"
!  set I0, 12
!  set I1, 1000
!  substr S1, S0, I0, I1
!  print  S0
!  print "\n"
!  print S1
!  print "\n"
   end
  CODE
  A string of length 21
***************
*** 114,168 ****
  # The same, with a negative offset
  output_is(<<'CODE', <<'OUTPUT', "len>strlen, -ve os");
   set S0, "A string of length 21"
!  set I0, -9
!  set I1, 1000
!  substr_s_s_i S1, S0, I0, I1
!  print S0
!  print "\n"
!  print S1
!  print "\n"
   end
  CODE
  A string of length 21
  length 21
  OUTPUT

! output_is( <<'CODE', '<><', "2-param concat, null onto null" );
!     print "<>"
!     concat S0,S0
!     print "<"
!     end
  CODE

! output_is( <<'CODE', <<OUTPUT, '2-param concat, "foo1" onto null' );
!     concat S0,"foo1"
!     print S0
!     print "\n"
!     end
  CODE
  foo1
  OUTPUT

! output_is( <<'CODE', <<OUTPUT, '2-param concat, "foo2" onto null' );
!     set S1,"foo2"
!     concat S0,S1
!     print S0
!     print "\n"
      end
  CODE
! foo2
  OUTPUT

! output_is( <<'CODE', <<OUTPUT, "concat" );
!     set S1, "fish"
!     set S2, "bone"
!     concat S1, S2
!     print S1
!     set S2, "\n"
!     print S2
      end
  CODE
! fishbone
  OUTPUT


--- 144,230 ----
  # The same, with a negative offset
  output_is(<<'CODE', <<'OUTPUT', "len>strlen, -ve os");
   set S0, "A string of length 21"
!  set I0, -9
!  set I1, 1000
!  substr S1, S0, I0, I1
!  print S0
!  print "\n"
!  print S1
!  print "\n"
   end
  CODE
  A string of length 21
  length 21
  OUTPUT

! output_is( <<'CODE', '<><', "concat_s_s|sc, null onto null" );
!  print "<>"
!  concat S0, S0
!  concat S1, ""
!  print "<"
!  end
  CODE

! output_is( <<'CODE', <<OUTPUT, 'concat_s_s|sc, "foo1" onto null' );
!  concat S0, "foo1"
!  set S1, "foo2"
!  concat S2, S1
!  print S0
!  print "\n"
!  print S2
!  print "\n"
!  end
  CODE
  foo1
+ foo2
  OUTPUT

! output_is( <<'CODE', <<OUTPUT, "concat_s_s|sc" );
!  set S1, "fish"
!  set S2, "bone"
!  concat S1, S2
!  print S1
!  concat S1, "\n"
!  print S1
      end
  CODE
! fishbonefishbone
  OUTPUT

! output_is( <<'CODE', <<OUTPUT, "concat_s_s|sc_s|sc" );
!  set S1, "japh"
!  set S2, "JAPH"
!  concat S0, "japh", "JAPH"
!  print S0
!  print "\n"
!  concat S0, S1, "JAPH"
!  print S0
!  print "\n"
!  concat S0, "japh", S2
!  print S0
!  print "\n"
!  concat S0, S1, S2
!  print S0
!  print "\n"
      end
  CODE
! japhJAPH
! japhJAPH
! japhJAPH
! japhJAPH
! OUTPUT
!
! output_is( <<'CODE', <<OUTPUT, "concat - ensure copy is made" );
!  set S2, "JAPH"
!  concat S0, S2, ""
!  concat S1, "", S2
!  chopn S0, 1
!  chopn S1, 1
!  print S2
!  print "\n"
!  end
! CODE
! JAPH
  OUTPUT


***************
*** 201,207 ****
  ok
  OUTPUT

! output_is(<<CODE, <<OUTPUT, "eq_s_sc_ic");
  @{[ compare_strings( 1, "eq", @strings ) ]}
      print "ok\\n"
      end
--- 263,269 ----
  ok
  OUTPUT

! output_is(<<CODE, <<OUTPUT, "eq_sc_s_ic");
  @{[ compare_strings( 1, "eq", @strings ) ]}
      print "ok\\n"
      end
***************
*** 212,275 ****
  ok
  OUTPUT

! output_is(<<CODE, <<OUTPUT, "eq_sc_s_ic");
!  set S0, "I am legion"

!  eq "I am legion", S0, GOOD1
!  print "not "
! GOOD1: print "ok 1\\n"

!  eq "I am legend", S0, BAD1
!  branch GOOD2
! BAD1: print "not "
! GOOD2: print "ok 2\\n"
!  end
  CODE
! ok 1
! ok 2
  OUTPUT

  output_is(<<CODE, <<OUTPUT, "ne_sc_s_ic");
!  set S0, "I am legion"

!  ne "I am legend", S0, GOOD1
!  print "not "
! GOOD1: print "ok 1\\n"

!  ne "I am legion", S0, BAD1
!  branch GOOD2
! BAD1: print "not "
! GOOD2: print "ok 2\\n"
!  end
  CODE
! ok 1
! ok 2
  OUTPUT

! output_is(<<CODE, <<OUTPUT, "eq_sc_s");

   set S0, "Sparticus"
   bsr TEST1
   print "ok 1\\n"
   bsr TEST2
   print "ok 2\\n"
   end

  TEST1: eq "Sparticus", S0
   print "not "
   ret

! TEST2: ne "Spartisnt", S0
   print "not "
   ret

  CODE
  ok 1
  ok 2
  OUTPUT

! output_is(<<CODE, <<OUTPUT, "ne_s_s_ic");
! @{[ compare_strings( 0, "ne", @strings ) ]}
      print "ok\\n"
      end
  ERROR:
--- 274,383 ----
  ok
  OUTPUT

! output_is(<<CODE, <<OUTPUT, "eq_s_sc_ic");
! @{[ compare_strings( 2, "eq", @strings ) ]}
!     print "ok\\n"
!     end
! ERROR:
!     print "bad\\n"
!     end
! CODE
! ok
! OUTPUT

! output_is(<<CODE, <<OUTPUT, "eq_sc_sc_ic");
! @{[ compare_strings( 3, "eq", @strings ) ]}
!     print "ok\\n"
!     end
! ERROR:
!     print "bad\\n"
!     end
! CODE
! ok
! OUTPUT

! output_is(<<CODE, <<OUTPUT, "ne_s_s_ic");
! @{[ compare_strings( 0, "ne", @strings ) ]}
!     print "ok\\n"
!     end
! ERROR:
!     print "bad\\n"
!     end
  CODE
! ok
  OUTPUT

  output_is(<<CODE, <<OUTPUT, "ne_sc_s_ic");
! @{[ compare_strings( 1, "ne", @strings ) ]}
!     print "ok\\n"
!     end
! ERROR:
!     print "bad\\n"
!     end
! CODE
! ok
! OUTPUT

! output_is(<<CODE, <<OUTPUT, "ne_s_sc_ic");
! @{[ compare_strings( 2, "ne", @strings ) ]}
!     print "ok\\n"
!     end
! ERROR:
!     print "bad\\n"
!     end
! CODE
! ok
! OUTPUT

! output_is(<<CODE, <<OUTPUT, "ne_sc_sc_ic");
! @{[ compare_strings( 3, "ne", @strings ) ]}
!     print "ok\\n"
!     end
! ERROR:
!     print "bad\\n"
!     end
  CODE
! ok
  OUTPUT

! output_is(<<CODE, <<OUTPUT, "eq_s|sc_s|sc");

   set S0, "Sparticus"
   bsr TEST1
   print "ok 1\\n"
   bsr TEST2
   print "ok 2\\n"
+  bsr TEST3
+  print "ok 3\\n"
+  bsr TEST4
+  print "ok 4\\n"
   end

  TEST1: eq "Sparticus", S0
   print "not "
   ret

! TEST2: eq S0, "Sparticus"
!  print "not "
!  ret
!
! TEST3: eq S0, S0
!  print "not "
!  ret
!
! TEST4: eq "Sparticus", "Sparticus"
   print "not "
   ret

  CODE
  ok 1
  ok 2
+ ok 3
+ ok 4
  OUTPUT

! output_is(<<CODE, <<OUTPUT, "lt_s_s_ic");
! @{[ compare_strings( 0, "lt", @strings ) ]}
      print "ok\\n"
      end
  ERROR:
***************
*** 279,286 ****
  ok
  OUTPUT

! output_is(<<CODE, <<OUTPUT, "ne_s_sc_ic");
! @{[ compare_strings( 1, "ne", @strings ) ]}
      print "ok\\n"
      end
  ERROR:
--- 387,394 ----
  ok
  OUTPUT

! output_is(<<CODE, <<OUTPUT, "lt_sc_s_ic");
! @{[ compare_strings( 1, "lt", @strings ) ]}
      print "ok\\n"
      end
  ERROR:
***************
*** 290,297 ****
  ok
  OUTPUT

! output_is(<<CODE, <<OUTPUT, "lt_s_s_ic");
! @{[ compare_strings( 0, "lt", @strings ) ]}
      print "ok\\n"
      end
  ERROR:
--- 398,405 ----
  ok
  OUTPUT

! output_is(<<CODE, <<OUTPUT, "lt_s_sc_ic");
! @{[ compare_strings( 2, "lt", @strings ) ]}
      print "ok\\n"
      end
  ERROR:
***************
*** 301,308 ****
  ok
  OUTPUT

! output_is(<<CODE, <<OUTPUT, "lt_s_sc_ic");
! @{[ compare_strings( 1, "lt", @strings ) ]}
      print "ok\\n"
      end
  ERROR:
--- 409,416 ----
  ok
  OUTPUT

! output_is(<<CODE, <<OUTPUT, "lt_sc_sc_ic");
! @{[ compare_strings( 3, "lt", @strings ) ]}
      print "ok\\n"
      end
  ERROR:
***************
*** 323,329 ****
  ok
  OUTPUT

! output_is(<<CODE, <<OUTPUT, "le_s_sc_ic");
  @{[ compare_strings( 1, "le", @strings ) ]}
      print "ok\\n"
      end
--- 431,437 ----
  ok
  OUTPUT

! output_is(<<CODE, <<OUTPUT, "le_sc_s_ic");
  @{[ compare_strings( 1, "le", @strings ) ]}
      print "ok\\n"
      end
***************
*** 334,339 ****
--- 442,469 ----
  ok
  OUTPUT

+ output_is(<<CODE, <<OUTPUT, "le_s_sc_ic");
+ @{[ compare_strings( 2, "le", @strings ) ]}
+     print "ok\\n"
+     end
+ ERROR:
+     print "bad\\n"
+     end
+ CODE
+ ok
+ OUTPUT
+
+ output_is(<<CODE, <<OUTPUT, "le_sc_sc_ic");
+ @{[ compare_strings( 3, "le", @strings ) ]}
+     print "ok\\n"
+     end
+ ERROR:
+     print "bad\\n"
+     end
+ CODE
+ ok
+ OUTPUT
+
  output_is(<<CODE, <<OUTPUT, "gt_s_s_ic");
  @{[ compare_strings( 0, "gt", @strings ) ]}
      print "ok\\n"
***************
*** 345,351 ****
  ok
  OUTPUT

! output_is(<<CODE, <<OUTPUT, "gt_s_sc_ic");
  @{[ compare_strings( 1, "gt", @strings ) ]}
      print "ok\\n"
      end
--- 475,481 ----
  ok
  OUTPUT

! output_is(<<CODE, <<OUTPUT, "gt_sc_s_ic");
  @{[ compare_strings( 1, "gt", @strings ) ]}
      print "ok\\n"
      end
***************
*** 356,361 ****
--- 486,513 ----
  ok
  OUTPUT

+ output_is(<<CODE, <<OUTPUT, "gt_s_sc_ic");
+ @{[ compare_strings( 2, "gt", @strings ) ]}
+     print "ok\\n"
+     end
+ ERROR:
+     print "bad\\n"
+     end
+ CODE
+ ok
+ OUTPUT
+
+ output_is(<<CODE, <<OUTPUT, "gt_sc_sc_ic");
+ @{[ compare_strings( 3, "gt", @strings ) ]}
+     print "ok\\n"
+     end
+ ERROR:
+     print "bad\\n"
+     end
+ CODE
+ ok
+ OUTPUT
+
  output_is(<<CODE, <<OUTPUT, "ge_s_s_ic");
  @{[ compare_strings( 0, "ge", @strings ) ]}
      print "ok\\n"
***************
*** 367,373 ****
  ok
  OUTPUT

! output_is(<<CODE, <<OUTPUT, "ge_s_sc_ic");
  @{[ compare_strings( 1, "ge", @strings ) ]}
      print "ok\\n"
      end
--- 519,525 ----
  ok
  OUTPUT

! output_is(<<CODE, <<OUTPUT, "ge_sc_s_ic");
  @{[ compare_strings( 1, "ge", @strings ) ]}
      print "ok\\n"
      end
***************
*** 378,383 ****
--- 530,557 ----
  ok
  OUTPUT

+ output_is(<<CODE, <<OUTPUT, "ge_s_sc_ic");
+ @{[ compare_strings( 2, "ge", @strings ) ]}
+     print "ok\\n"
+     end
+ ERROR:
+     print "bad\\n"
+     end
+ CODE
+ ok
+ OUTPUT
+
+ output_is(<<CODE, <<OUTPUT, "ge_sc_sc_ic");
+ @{[ compare_strings( 3, "ge", @strings ) ]}
+     print "ok\\n"
+     end
+ ERROR:
+     print "bad\\n"
+     end
+ CODE
+ ok
+ OUTPUT
+
  output_is(<<'CODE', <<OUTPUT, "same constant twice bug");
         set     S0, ""
         set     S1, ""
***************
*** 421,426 ****
--- 595,606 ----
   end
  CODE

+ output_is(<<'CODE',ord('a'),'2-param ord, multi-character string');
+  ord I0,"abc"
+  print I0
+  end
+ CODE
+
  output_is(<<'CODE',ord('a'),'2-param ord, one-character string register');
   set S0,"a"
   ord I0,S0
***************
*** 493,498 ****
--- 673,685 ----
   end
  CODE

+ output_is(<<'CODE','Cannot get character past end of string','3-param ord,
multi-character string register, from end, OOB');
+  set S0,"ab"
+  ord I0,S0,-3
+  print I0
+  end
+ CODE
+
  output_is(<<CODE, <<OUTPUT, "if_s_ic");
   set S0, "I've told you once, I've told you twice..."
   if S0, OK1
***************
*** 554,560 ****
  ok 9
  OUTPUT

! output_is(<<CODE, <<OUTPUT, "repeat");
   set S0, "x"

   repeat S1, S0, 12
--- 741,747 ----
  ok 9
  OUTPUT

! output_is(<<CODE, <<OUTPUT, "repeat_s_s|sc_i|ic");
   set S0, "x"

   repeat S1, S0, 12
***************
*** 596,601 ****
--- 783,794 ----
  >< done
  OUTPUT

+ output_is(<<'CODE','Cannot repeat with negative arg','repeat OOB');
+  repeat S0, "japh", -1
+  end
+ CODE
+
+
  # Set all string registers to values given by &$_[0](reg num)
  sub set_str_regs {
    my $code = shift;
***************
*** 623,643 ****
    while (@strings) {
      my $s1 = shift @strings;
      my $s2 = shift @strings;
!     my $arg;
!     $rt .= "    set S0, \"$s1\"\n";
!     if ($const) {
!       $arg = "\"$s2\"";
      }
      else {
!       $rt .= "    set S1, \"$s2\"\n";
!       $arg = "S1";
      }
      if (eval "\"$s1\" $op \"$s2\"") {
!       $rt .= "    $op S0, $arg, OK$i\n";
        $rt .= "    branch ERROR\n";
      }
      else {
!       $rt .= "    $op S0, $arg, ERROR\n";
      }
      $rt .= "OK$i:\n";
      $i++;
--- 816,849 ----
    while (@strings) {
      my $s1 = shift @strings;
      my $s2 = shift @strings;
!     my $arg1;
!     my $arg2;
!     if ($const == 3) {
!  $arg1 = "\"$s1\"";
!  $arg2 = "\"$s2\"";
!     }
!     elsif ($const == 2) {
!  $rt .= "    set S0, \"$s1\"\n";
!  $arg1 = "S0";
!  $arg2 = "\"$s2\"";
!     }
!     elsif ($const == 1) {
!  $rt .= "    set S0, \"$s2\"\n";
!  $arg1 = "\"$s1\"";
!  $arg2 = "S0";
      }
      else {
!  $rt .= "    set S0, \"$s1\"\n";
!  $rt .= "    set S1, \"$s2\"\n";
!  $arg1 = "S0";
!  $arg2 = "S1";
      }
      if (eval "\"$s1\" $op \"$s2\"") {
!       $rt .= "    $op $arg1, $arg2, OK$i\n";
        $rt .= "    branch ERROR\n";
      }
      else {
!       $rt .= "    $op $arg1, $arg2, ERROR\n";
      }
      $rt .= "OK$i:\n";
      $i++;

Re: Large string patch

Reply via email to