This is an automated email from the git hooks/post-receive script. henrich pushed a commit to branch debian/sid in repository jruby-joni.
commit 5572dd33a707da6d4e745b29d973e944d9db8eb1 Author: Marcin Mielzynski <[email protected]> Date: Sun Feb 19 23:47:37 2012 +0100 Experimental optimization by using string templates outside of bytecode stream. In most cases there will be no copies at all instead of converting strings into integer array. --- src/org/joni/Analyser.java | 2 ++ src/org/joni/ArrayCompiler.java | 37 ++++++++++++++++++++++++++------ src/org/joni/ByteCodeMachine.java | 32 ++++++++++++++++++++++------ src/org/joni/ByteCodePrinter.java | 43 ++++++++++++++++++++++++++++++++------ src/org/joni/Config.java | 2 ++ src/org/joni/Regex.java | 4 ++++ src/org/joni/ast/StringNode.java | 2 +- src/org/joni/constants/OPSize.java | 1 + 8 files changed, 104 insertions(+), 19 deletions(-) diff --git a/src/org/joni/Analyser.java b/src/org/joni/Analyser.java index 57bc6fe..b8995ce 100644 --- a/src/org/joni/Analyser.java +++ b/src/org/joni/Analyser.java @@ -172,7 +172,9 @@ final class Analyser extends Parser { if (Config.DEBUG_COMPILE) { if (Config.USE_NAMED_GROUP) Config.log.print(regex.nameTableToString()); Config.log.println("stack used: " + regex.stackNeeded); + if (Config.USE_STRING_TEMPLATES) Config.log.print(" templates: " + regex.templateNum); Config.log.println(new ByteCodePrinter(regex).byteCodeListToString()); + } // DEBUG_COMPILE regex.state = RegexState.NORMAL; diff --git a/src/org/joni/ArrayCompiler.java b/src/org/joni/ArrayCompiler.java index 6193e5d..f863a3f 100644 --- a/src/org/joni/ArrayCompiler.java +++ b/src/org/joni/ArrayCompiler.java @@ -105,6 +105,10 @@ final class ArrayCompiler extends Compiler { op == OPCode.EXACTN_IC_SB; } + private boolean opTemplated(int op) { + return op == OPCode.EXACTN || op == OPCode.EXACTMB2N; + } + private int selectStrOpcode(int mbLength, int strLength, boolean ignoreCase) { int op; @@ -172,13 +176,16 @@ final class ArrayCompiler extends Compiler { private int addCompileStringlength(byte[]bytes, int p, int mbLength, int strLength, boolean ignoreCase) { int op = selectStrOpcode(mbLength, strLength, ignoreCase); - int len = OPSize.OPCODE; - if (op == OPCode.EXACTMBN) len += OPSize.LENGTH; - if (isNeedStrLenOpExact(op)) len += OPSize.LENGTH; - - len += mbLength * strLength; + if (Config.USE_STRING_TEMPLATES && opTemplated(op)) { + // string length, template index, template string pointer + len += OPSize.LENGTH + OPSize.INDEX + OPSize.INDEX; + } else { + if (op == OPCode.EXACTMBN) len += OPSize.LENGTH; + if (isNeedStrLenOpExact(op)) len += OPSize.LENGTH; + len += mbLength * strLength; + } return len; } @@ -196,7 +203,14 @@ final class ArrayCompiler extends Compiler { addLength(strLength); } } - addBytes(bytes, p, mbLength * strLength); + + if (Config.USE_STRING_TEMPLATES && opTemplated(op)) { + addInt(regex.templateNum); + addInt(p); + addTemplate(bytes); + } else { + addBytes(bytes, p, mbLength * strLength); + } } private int compileLengthStringNode(Node node) { @@ -1234,4 +1248,15 @@ final class ArrayCompiler extends Compiler { addOpcode(opcode); addOption(option); } + + private void addTemplate(byte[]bytes) { + if (regex.templateNum == 0) { + regex.templates = new byte[2][]; + } else if (regex.templateNum == regex.templates.length) { + byte[][]tmp = new byte[regex.templateNum * 2][]; + System.arraycopy(regex.templates, 0, tmp, 0, regex.templateNum); + regex.templates = tmp; + } + regex.templates[regex.templateNum++] = bytes; + } } diff --git a/src/org/joni/ByteCodeMachine.java b/src/org/joni/ByteCodeMachine.java index bb2b18d..f36a2ce 100644 --- a/src/org/joni/ByteCodeMachine.java +++ b/src/org/joni/ByteCodeMachine.java @@ -473,7 +473,15 @@ class ByteCodeMachine extends StackMachine { int tlen = code[ip++]; if (s + tlen > range) {opFail(); return;} - while (tlen-- > 0) if (code[ip++] != bytes[s++]) {opFail(); return;} + if (Config.USE_STRING_TEMPLATES) { + byte[]bs = regex.templates[code[ip++]]; + int ps = code[ip++]; + + while (tlen-- > 0) if (bs[ps++] != bytes[s++]) {opFail(); return;} + + } else { + while (tlen-- > 0) if (code[ip++] != bytes[s++]) {opFail(); return;} + } sprev = s - 1; } @@ -520,11 +528,23 @@ class ByteCodeMachine extends StackMachine { int tlen = code[ip++]; if (tlen * 2 > range) {opFail(); return;} - while(tlen-- > 0) { - if (code[ip] != bytes[s]) {opFail(); return;} - ip++; s++; - if (code[ip] != bytes[s]) {opFail(); return;} - ip++; s++; + if (Config.USE_STRING_TEMPLATES) { + byte[]bs = regex.templates[code[ip++]]; + int ps = code[ip++]; + + while(tlen-- > 0) { + if (bs[ps] != bytes[s]) {opFail(); return;} + ps++; s++; + if (bs[ps] != bytes[s]) {opFail(); return;} + ps++; s++; + } + } else { + while(tlen-- > 0) { + if (code[ip] != bytes[s]) {opFail(); return;} + ip++; s++; + if (code[ip] != bytes[s]) {opFail(); return;} + ip++; s++; + } } sprev = s - 2; } diff --git a/src/org/joni/ByteCodePrinter.java b/src/org/joni/ByteCodePrinter.java index 0a23211..4cddca3 100644 --- a/src/org/joni/ByteCodePrinter.java +++ b/src/org/joni/ByteCodePrinter.java @@ -27,8 +27,9 @@ import org.joni.constants.OPSize; import org.joni.exception.InternalException; class ByteCodePrinter { - int[]code; - int codeLength; + final int[]code; + final int codeLength; + final byte[][] templates; Object[]operands; int operantCount; @@ -40,6 +41,8 @@ class ByteCodePrinter { codeLength = regex.codeLength; operands = regex.operands; operantCount = regex.operandLength; + + templates = regex.templates; enc = regex.enc; warnings = regex.warnings; } @@ -53,16 +56,28 @@ class ByteCodePrinter { while (len-- > 0) sb.append(new String(new byte[]{(byte)code[s++]})); } + private void pStringFromTemplate(StringBuilder sb, int len, byte[]tm, int idx) { + sb.append(":T:"); + while (len-- > 0) sb.append(new String(new byte[]{tm[idx++]})); + } + private void pLenString(StringBuilder sb, int len, int mbLen, int s) { int x = len * mbLen; sb.append(":" + len + ":"); while (x-- > 0) sb.append(new String(new byte[]{(byte)code[s++]})); } + private void pLenStringFromTemplate(StringBuilder sb, int len, int mbLen, byte[]tm, int idx) { + int x = len * mbLen; + sb.append(":T:" + len + ":"); + while (x-- > 0) sb.append(new String(new byte[]{(byte)tm[idx++]})); + } + public int compiledByteCodeToString(StringBuilder sb, int bp) { int len, n, mem, addr, scn, cod; BitSet bs; CClassNode cc; + int tm, idx; sb.append("[" + OPCode.OpCodeNames[code[bp]]); int argType = OPCode.OpCodeArgTypes[code[bp]]; @@ -136,8 +151,16 @@ class ByteCodePrinter { case OPCode.EXACTN: len = code[bp]; bp += OPSize.LENGTH; - pLenString(sb, len, 1, bp); - bp += len; + if (Config.USE_STRING_TEMPLATES) { + tm = code[bp]; + bp += OPSize.INDEX; + idx = code[bp]; + bp += OPSize.INDEX; + pLenStringFromTemplate(sb, len, 1, templates[tm], idx); + } else { + pLenString(sb, len, 1, bp); + bp += len; + } break; case OPCode.EXACTMB2N1: @@ -158,8 +181,16 @@ class ByteCodePrinter { case OPCode.EXACTMB2N: len = code[bp]; bp += OPSize.LENGTH; - pLenString(sb, len, 2, bp); - bp += len * 2; + if (Config.USE_STRING_TEMPLATES) { + tm = code[bp]; + bp += OPSize.INDEX; + idx = code[bp]; + bp += OPSize.INDEX; + pLenStringFromTemplate(sb, len, 2, templates[tm], idx); + } else { + pLenString(sb, len, 2, bp); + bp += len * 2; + } break; case OPCode.EXACTMB3N: diff --git a/src/org/joni/Config.java b/src/org/joni/Config.java index 5b9fdbc..dab3565 100644 --- a/src/org/joni/Config.java +++ b/src/org/joni/Config.java @@ -64,6 +64,8 @@ public interface Config extends org.jcodings.Config { final boolean DONT_OPTIMIZE = false; + final boolean USE_STRING_TEMPLATES = true; // use embeded string templates in Regex object as byte arrays instead of compiling them into int bytecode array + final int MAX_CAPTURE_HISTORY_GROUP = 31; diff --git a/src/org/joni/Regex.java b/src/org/joni/Regex.java index 744d9d3..661ce2d 100644 --- a/src/org/joni/Regex.java +++ b/src/org/joni/Regex.java @@ -90,6 +90,9 @@ public final class Regex implements RegexState { int dMin; /* min-distance of exact or map */ int dMax; /* max-distance of exact or map */ + byte[][]templates; + int templateNum; + public Regex(CharSequence cs) { this(cs.toString()); } @@ -427,6 +430,7 @@ public final class Regex implements RegexState { s += "]\n"; } } + return s; } diff --git a/src/org/joni/ast/StringNode.java b/src/org/joni/ast/StringNode.java index fb83567..2857c94 100644 --- a/src/org/joni/ast/StringNode.java +++ b/src/org/joni/ast/StringNode.java @@ -99,7 +99,7 @@ public final class StringNode extends Node implements StringType { if ((bytes[i] & 0xff) >= 0x20 && (bytes[i] & 0xff) < 0x7f) { value.append((char)bytes[i]); } else { - value.append(String.format("0x%02x", bytes[i])); + value.append(String.format("[0x%02x]", bytes[i])); } } value.append("'"); diff --git a/src/org/joni/constants/OPSize.java b/src/org/joni/constants/OPSize.java index 05efbed..d5595ad 100644 --- a/src/org/joni/constants/OPSize.java +++ b/src/org/joni/constants/OPSize.java @@ -32,6 +32,7 @@ public interface OPSize { final int OPTION = 1; final int CODE_POINT = 1; final int POINTER = 1; + final int INDEX = 1; /* op-code + arg size */ -- Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/pkg-java/jruby-joni.git _______________________________________________ pkg-java-commits mailing list [email protected] http://lists.alioth.debian.org/cgi-bin/mailman/listinfo/pkg-java-commits

