Author: chromatic
Date: Tue Feb 26 12:23:00 2008
New Revision: 26075

Modified:
   trunk/src/charset/unicode.c
   trunk/src/encodings/utf8.c

Log:
[src] Improved performance of utf8_set_position() by about ten percent.

Avoided calling it from the two hottest paths in NQP when unnecessary.

The result is that generating Rakudo's parser actions is a whopping 2.5%
faster.  Further improvements may have to come from upgrading to fixed-width
strings and optimizing NQP to avoid character position counts.

Modified: trunk/src/charset/unicode.c
==============================================================================
--- trunk/src/charset/unicode.c (original)
+++ trunk/src/charset/unicode.c Tue Feb 26 12:23:00 2008
@@ -811,7 +811,10 @@
 
     PARROT_ASSERT(source_string);
     ENCODING_ITER_INIT(interp, source_string, &iter);
-    iter.set_position(interp, &iter, pos);
+
+    if (pos)
+        iter.set_position(interp, &iter, pos);
+
     end = source_string->strlen < end ? source_string->strlen : end;
     for (; pos < end; ++pos) {
         codepoint = iter.get_and_advance(interp, &iter);

Modified: trunk/src/encodings/utf8.c
==============================================================================
--- trunk/src/encodings/utf8.c  (original)
+++ trunk/src/encodings/utf8.c  Tue Feb 26 12:23:00 2008
@@ -446,20 +446,21 @@
 static void
 utf8_set_position(SHIM_INTERP, ARGMOD(String_iter *i), UINTVAL pos)
 {
-    UINTVAL charpos     = 0;
     const utf8_t *u8ptr = (const utf8_t *)i->str->strstart;
 
     /* start from last known charpos, if we can */
     if (i->charpos <= pos) {
-        charpos = i->charpos;
-        u8ptr += i->bytepos;
+        const UINTVAL old_pos = pos;
+        pos       -= i->charpos;
+        u8ptr     += i->bytepos;
+        i->charpos = old_pos;
     }
+    else
+        i->charpos = pos;
 
-    while (charpos < pos) {
+    while (pos-- > 0)
         u8ptr += UTF8SKIP(u8ptr);
-        charpos++;
-    }
-    i->charpos = pos;
+
     i->bytepos = (const char *)u8ptr - (const char *)i->str->strstart;
 }
 
@@ -648,17 +649,26 @@
 static STRING *
 get_codepoints(PARROT_INTERP, ARGIN(STRING *src), UINTVAL offset, UINTVAL 
count)
 {
-    String_iter iter;
-    UINTVAL start;
+
     STRING * const return_string = Parrot_make_COW_reference(interp, src);
+    String_iter    iter;
+    UINTVAL        start;
+
     iter_init(interp, src, &iter);
-    iter.set_position(interp, &iter, offset);
-    start = iter.bytepos;
-    return_string->strstart = (char *)return_string->strstart + start ;
-    iter.set_position(interp, &iter, offset + count);
-    return_string->bufused = iter.bytepos - start;
-    return_string->strlen = count;
-    return_string->hashval = 0;
+
+    if (offset)
+        iter.set_position(interp, &iter, offset);
+
+    start                   = iter.bytepos;
+    return_string->strstart = (char *)return_string->strstart + start;
+
+    if (count)
+        iter.set_position(interp, &iter, offset + count);
+
+    return_string->bufused  = iter.bytepos - start;
+    return_string->strlen   = count;
+    return_string->hashval  = 0;
+
     return return_string;
 }
 

Reply via email to