Fix performance issue with String#[Fixnum] on encoded strings -------------------------------------------------------------
Key: JRUBY-5411 URL: http://jira.codehaus.org/browse/JRUBY-5411 Project: JRuby Issue Type: Improvement Components: Core Classes/Modules Affects Versions: JRuby 1.6RC1 Reporter: Charles Oliver Nutter Fix For: JRuby 1.6RC2 This bug: http://redmine.ruby-lang.org/issues/show/4298 Shows how getting the nth character from a string causes it to calculate the string's length each time. Ruby-core fixed this by not checking the length and modifying str_utf8_nth to rewrite the "beg" reference as seen in this commit: https://github.com/ruby/ruby/commit/b0eaf0f621353b9075ff5ad8e4cd663da6a36dce We should try to fix it in a similar way. My first attempt at a patch doesn't do the beg rewriting, but does improve the performance and eliminate the full-string scan each time: {noformat} diff --git a/src/org/jruby/RubyString.java b/src/org/jruby/RubyString.java index 133fd7d..5b2149c 100644 --- a/src/org/jruby/RubyString.java +++ b/src/org/jruby/RubyString.java @@ -3286,10 +3286,11 @@ public class RubyString extends RubyObject implements EncodingCapable { beg += strLength(enc); if (beg < 0) return runtime.getNil(); } - } else if (beg > 0 && beg > strLength(enc)) { + } else if (beg > 0 && beg > value.getRealSize()) { return runtime.getNil(); } if (len == 0) { + if (beg > strLength(enc)) return runtime.getNil(); p = 0; } else if (isCodeRangeValid() && enc instanceof UTF8Encoding) { p = StringSupport.utf8Nth(bytes, s, end, beg); @@ -3298,8 +3299,7 @@ public class RubyString extends RubyObject implements EncodingCapable { int w = enc.maxLength(); p = s + beg * w; if (p > end) { - p = end; - len = 0; + return runtime.getNil(); } else if (len * w > end - p) { len = end - p; } else { {noformat} I'm not sure if this is a safe change or not. -- This message is automatically generated by JIRA. - If you think it was sent incorrectly contact one of the administrators: http://jira.codehaus.org/secure/Administrators.jspa - For more information on JIRA, see: http://www.atlassian.com/software/jira --------------------------------------------------------------------- To unsubscribe from this list, please visit: http://xircles.codehaus.org/manage_email