Oscar Nierstrasz reported a regex-intensive script that is 10 times slower in Smalltalk than in Ruby.

This was particularly low hanging fruit: Regex objects are actually no different from strings, simply they are cached because they are created read-only. So, we can extend the caching to any read-only string (including a String literal). This removes the need to send asRegex to literals -- only when using a variable as the regular expression that should be necessary.

Before:

st> Time millisecondsToRun: [ 100000 timesRepeat: [ 'a.c' ~ 'abc' ] ]
2200
st> Time millisecondsToRun: [ 100000 timesRepeat: [ 'abc' ~ 'a.c' ] ]
2523
st> Time millisecondsToRun: [ 100000 timesRepeat: [ 'abc' ~ ##('a.c' asRegex) ] ]
1666

After:

st> Time millisecondsToRun: [ 100000 timesRepeat: [ 'a.c' ~ 'abc' ] ]
1590
st> Time millisecondsToRun: [ 100000 timesRepeat: [ 'abc' ~ 'a.c' ] ]
1732
st> Time millisecondsToRun: [ 100000 timesRepeat: [ 'abc' ~ ##('a.c' asRegex) ] ]
1609

(Oscar, another change to do in your script is to move temporaries outside whileXxxx: because that prevents inlining the block).

Paolo
--- orig/libgst/re.c
+++ mod/libgst/re.c
@@ -154,13 +154,7 @@ lookupRegex (OOP patternOOP, struct pre_
   int i;
   RegexCaching result;
 
-  if (!regexClassOOP)
-    {
-      regexClassOOP = _gst_class_name_to_oop ("Regex");
-      resultsClassOOP = _gst_class_name_to_oop ("Kernel.MatchingRegexResults");
-    }
-
-  if (OOP_CLASS (patternOOP) != regexClassOOP)
+  if (!IS_OOP_READONLY (patternOOP))
     {
       *pRegex = allocateNewRegex ();
       return REGEX_NOT_CACHED;
@@ -218,7 +212,7 @@ _gst_re_make_cacheable (OOP patternOOP)
       resultsClassOOP = _gst_class_name_to_oop ("Kernel.MatchingRegexResults");
     }
 
-  if (OOP_CLASS (patternOOP) == regexClassOOP)
+  if (!IS_OOP_READONLY (patternOOP))
     return patternOOP;
 
   /* Search in the cache */
@@ -285,6 +279,12 @@ make_re_results (OOP srcOOP, struct pre_
   if (!regs->beg || regs->beg[0] == -1)
     return _gst_nil_oop;
 
+  if (!regexClassOOP)
+    {
+      regexClassOOP = _gst_class_name_to_oop ("Regex");
+      resultsClassOOP = _gst_class_name_to_oop ("Kernel.MatchingRegexResults");
+    }
+
   resultsOOP = _gst_object_alloc (resultsClassOOP, 0);
   results = (gst_registers) OOP_TO_OBJ (resultsOOP);
   results->subjectOOP = srcOOP;
_______________________________________________
help-smalltalk mailing list
[email protected]
http://lists.gnu.org/mailman/listinfo/help-smalltalk

Reply via email to