Just in time for 0.0.5 :)
We need this. Check it over first. I ran various test cases on it, will write
up the test scripts ASAP.
Implements the substr with replace variation as per the Perl semantics (I
think).
I'll commit after the 0.0.4 freeze if it looks correct.
It might need another pass to optimize it.
-Melvin
Index: core.ops
===================================================================
RCS file: /cvs/public/parrot/core.ops,v
retrieving revision 1.106
diff -u -r1.106 core.ops
--- core.ops 19 Mar 2002 00:33:36 -0000 1.106
+++ core.ops 19 Mar 2002 23:42:26 -0000
@@ -1489,13 +1489,22 @@
=item B<substr>(out STR, in STR, in INT, in INT)
+=item B<substr>(out STR, in STR, in INT, in INT, in STR)
+
Set $1 to the portion of $2 starting at (zero-based) character position
$3 and having
length $4.
+Optionally pass in string $5 for replacement.
+
=cut
inline op substr(out STR, in STR, in INT, in INT) {
$1 = string_substr(interpreter, $2, $3, $4, &$1);
+ goto NEXT();
+}
+
+inline op substr(out STR, in STR, in INT, in INT, in STR) {
+ $1 = string_replace(interpreter, $2, $3, $4, $5, &$1);
goto NEXT();
}
Index: string.c
===================================================================
RCS file: /cvs/public/parrot/string.c,v
retrieving revision 1.58
diff -u -r1.58 string.c
--- string.c 17 Mar 2002 06:44:41 -0000 1.58
+++ string.c 19 Mar 2002 23:42:28 -0000
@@ -67,6 +67,26 @@
return s;
}
+/*=for api string string_grow
+ * grow the string buffer by addlen bytes
+ */
+STRING *
+string_grow(struct Parrot_Interp * interpreter, STRING * s, INTVAL addlen) {
+ void * newbuf;
+ INTVAL copysize = s->bufused;
+ if(addlen < 0)
+ copysize += addlen;
+ if(copysize <= 0)
+ return s;
+ /* Don't check buflen, if we are here, we already checked */
+ newbuf = Parrot_allocate(interpreter, s->buflen + addlen);
+ mem_sys_memcopy(newbuf, s->bufstart, (UINTVAL)copysize);
+ free_buffer(s->bufstart);
+ s->bufstart = newbuf;
+ s->buflen += addlen;
+ return s;
+}
+
/*=for api string string_destroy
* free the strings memory
*/
@@ -391,6 +411,126 @@
if (d != NULL) {
*d = dest;
}
+ return dest;
+}
+
+/*
+ * This should follow the Perl semantics for:
+ * substr EXPR, OFFSET, LENGTH, REPLACEMENT
+ * Replace substring of src with rep, returning what was there before.
+ * Replacing a slice with a longer string grows the string;
+ * a shorter string shrinks it.
+ * Replacing 2 past the end of the string is undefined.
+ * however replacing 1 past does a concat.
+ * A negative offset is allowed to replace from the end.
+ */
+STRING *
+string_replace(struct Parrot_Interp *interpreter, STRING *src,
+ INTVAL offset, INTVAL length, const STRING *rep, STRING **d)
+{
+ STRING *dest;
+ UINTVAL substart_off; /* Offset from start of string to our
+ * piece */
+ UINTVAL subend_off; /* Offset from start of string to the
+ * end of our piece */
+ UINTVAL true_offset;
+ UINTVAL true_length;
+ UINTVAL new_length;
+ UINTVAL new_size;
+ INTVAL diff;
+
+ true_offset = (UINTVAL)offset;
+ true_length = (UINTVAL)length;
+
+ if(rep->encoding != src->encoding || rep->type != src->type)
+ rep = string_transcode(interpreter, rep, src->encoding, src->type,
NULL);
+
+ /* abs(-offset) may not be > strlen-1 */
+ if (offset < 0) {
+ true_offset = (UINTVAL)(src->strlen + offset);
+ }
+
+ /* Can replace 1 past end of string which is technically outside the
string
+ * but is same as a concat().
+ * Only give exception if caller trys to replace end of string + 2
+ */
+ if (true_offset > src->strlen) {
+ internal_exception(SUBSTR_OUT_OF_STRING,
+ "Can only replace inside string or index after
end of string");
+ }
+ if (true_length > (src->strlen - true_offset)) {
+ true_length = (UINTVAL)(src->strlen - true_offset);
+ }
+
+ /* Save the substring that is replaced for the return value */
+ substart_off = (char *)src->encoding->skip_forward(src->bufstart,
+ true_offset) -
+ (char *)src->bufstart;
+ subend_off =
+ (char *)src->encoding->skip_forward((char *)src->bufstart +
+ substart_off,
+ true_length) -
+ (char *)src->bufstart;
+
+ dest =
+ string_make(interpreter, NULL, true_length * src->encoding->max_bytes,
+ src->encoding, 0, src->type);
+
+ if (subend_off < substart_off) {
+ internal_exception(SUBSTR_OUT_OF_STRING,
+ "subend somehow is less than substart");
+ }
+
+ mem_sys_memcopy(dest->bufstart, (char *)src->bufstart + substart_off,
+ (unsigned)(subend_off - substart_off));
+ dest->bufused = subend_off - substart_off;
+ dest->strlen = true_length;
+
+ if (d != NULL) {
+ *d = dest;
+ }
+
+ /* Now do the replacement */
+
+ /*
+ * If the replacement string fits inside the original substring
+ * don't create a new string, just pack it.
+ */
+ diff = dest->bufused - rep->bufused;
+
+ if(diff >= 0
+ || (INTVAL)(src->bufused - src->buflen) <= diff) {
+
+ mem_sys_memcopy((char*)src->bufstart + substart_off,
+ rep->bufstart, rep->bufused);
+ if(diff > 0) {
+ mem_sys_memmove((char*)src->bufstart + substart_off +
rep->bufused,
+ (char*)src->bufstart + subend_off,
+ src->buflen - (subend_off - diff));
+ src->bufused -= diff;
+ (void)string_compute_strlen(src);
+ }
+ }
+ /*
+ * Replacement is larger than avail buffer, grow the string
+ */
+ else {
+ /* diff is negative here, make it positive */
+ diff = -(diff);
+ string_grow(interpreter, src, diff);
+
+ /* Move the end of old string that isn't replaced to new offset
first */
+ mem_sys_memmove((char*)src->bufstart + subend_off + diff,
+ (char*)src->bufstart + subend_off,
+ src->buflen - subend_off);
+ /* Copy the replacement in */
+ mem_sys_memcopy((char *)src->bufstart + substart_off, rep->bufstart,
+ rep->bufused);
+ src->bufused += diff;
+ (void)string_compute_strlen(src);
+ }
+
+ /* src is modified, now return the original substring */
return dest;
}
Index: include/parrot/memory.h
===================================================================
RCS file: /cvs/public/parrot/include/parrot/memory.h,v
retrieving revision 1.9
diff -u -r1.9 memory.h
--- include/parrot/memory.h 15 Mar 2002 19:45:00 -0000 1.9
+++ include/parrot/memory.h 19 Mar 2002 23:42:28 -0000
@@ -30,6 +30,7 @@
#define mem_allocate_new_stash() NULL
#define mem_allocate_new_stack() NULL
#define mem_sys_memcopy memcpy
+#define mem_sys_memmove memmove
#define Parrot_mark_used_memory(a, b, c) mem_realloc(a, b, c, c)
#endif
Index: include/parrot/string_funcs.h
===================================================================
RCS file: /cvs/public/parrot/include/parrot/string_funcs.h,v
retrieving revision 1.4
diff -u -r1.4 string_funcs.h
--- include/parrot/string_funcs.h 4 Mar 2002 03:17:21 -0000 1.4
+++ include/parrot/string_funcs.h 19 Mar 2002 23:42:28 -0000
@@ -23,6 +23,8 @@
STRING *Parrot_string_chopn(STRING *, INTVAL);
STRING *Parrot_string_substr(Parrot, const STRING *, INTVAL,
INTVAL, STRING **);
+STRING *Parrot_string_replace(Parrot, STRING *, INTVAL, INTVAL,
+ const STRING *, STRING **);
INTVAL Parrot_string_compare(Parrot, const STRING *, const STRING *);
Parrot_Bool Parrot_string_bool(const STRING *);
@@ -31,6 +33,7 @@
INTVAL Parrot_string_ord(const STRING *, INTVAL idx);
FLOATVAL Parrot_string_to_num(const STRING *);
INTVAL Parrot_string_to_int(const STRING *);
+STRING * Parrot_string_grow(struct Parrot_Interp * interpreter, STRING *
s, INTVAL addlen);
void Parrot_string_destroy(STRING *);
STRING *Parrot_string_make(struct Parrot_Interp *, const void *buffer,
UINTVAL buflen, const ENCODING *, UINTVAL flags,
@@ -49,6 +52,7 @@
#define string_repeat Parrot_string_repeat
#define string_chopn Parrot_string_chopn
#define string_substr Parrot_string_substr
+#define string_replace Parrot_string_replace
#define string_compare Parrot_string_compare
#define string_bool Parrot_string_bool
@@ -56,6 +60,7 @@
#define string_ord Parrot_string_ord
#define string_to_num Parrot_string_to_num
#define string_to_int Parrot_string_to_int
+#define string_grow Parrot_string_grow
#define string_destroy Parrot_string_destroy
#define string_make Parrot_string_make
#define string_copy Parrot_string_copy