Here's a fun algorithm for you. How do you find a substring within another
string?
I really want to apply something like this patch. I'd like to take advantage
of libc functions to do the hard work, rather than walking through a string,
byte by byte. Unfortunately, libc functions think \0 marks the end of a
string always and everywhere. Silly libc.
This patch passes all of the tests and is slightly more efficient. I suspect
that there are true gains available by moving the strstr() call inside the
loop appropriately, instead of always trying to match at the next position.
-- c
=== src/utils.c
==================================================================
--- src/utils.c (revision 4086)
+++ src/utils.c (local)
@@ -560,20 +560,31 @@
Parrot_byte_index(Interp *interp, const STRING *base /*NN*/,
const STRING *search /*NN*/, UINTVAL start_offset)
{
- const INTVAL searchlen = search->strlen;
const char * const search_start = search->strstart;
- const INTVAL max_possible_offset = (base->strlen - search->strlen);
- INTVAL current_offset;
- for (current_offset = start_offset; current_offset <= max_possible_offset;
- current_offset++) {
- const char * const base_start = (char *)base->strstart + current_offset;
- if (memcmp(base_start, search_start, searchlen) == 0) {
- return current_offset;
+ if (strlen(base->strstart) == base->strlen) {
+ const char * const pos =
+ strstr((char *)base->strstart + start_offset, search_start);
+
+ if (pos)
+ return (INTVAL)(pos - base->strstart);
+ }
+
+ {
+ const INTVAL searchlen = search->strlen;
+ const INTVAL max_possible_offset = (base->strlen - search->strlen);
+ INTVAL current_offset = start_offset;
+
+ for (; current_offset <= max_possible_offset; current_offset++) {
+ const char * const base_start =
+ (char *)base->strstart + current_offset;
+
+ if (memcmp(base_start, search_start, searchlen) == 0)
+ return current_offset;
}
+
+ return -1;
}
-
- return -1;
}
PARROT_API