This is an automated email from the git hooks/post-receive script. henrich pushed a commit to branch debian/sid in repository jruby-joni.
commit f861a46f14c5e243716a03fe596078d58ea2d0ab Author: Marcin Mielzynski <l...@gazeta.pl> Date: Thu Jan 4 17:43:19 2018 +0100 fix ignore case --- src/org/joni/Analyser.java | 3 ++- src/org/joni/OptExactInfo.java | 36 ++++++++++++++++++------------------ src/org/joni/Regex.java | 2 +- test/org/joni/test/TestU8.java | 21 +++++++++++++++++++++ 4 files changed, 42 insertions(+), 20 deletions(-) diff --git a/src/org/joni/Analyser.java b/src/org/joni/Analyser.java index 98a4dd2..b267e85 100644 --- a/src/org/joni/Analyser.java +++ b/src/org/joni/Analyser.java @@ -2022,6 +2022,7 @@ final class Analyser extends Parser { if (!sn.isAmbig()) { opt.exb.concatStr(sn.bytes, sn.p, sn.end, sn.isRaw(), enc); + opt.exb.ignoreCase = 0; if (slen > 0) { opt.map.addChar(sn.bytes[sn.p], enc); @@ -2035,7 +2036,7 @@ final class Analyser extends Parser { max = enc.maxLengthDistance() * n; } else { opt.exb.concatStr(sn.bytes, sn.p, sn.end, sn.isRaw(), enc); - opt.exb.ignoreCase = true; + opt.exb.ignoreCase = 1; if (slen > 0) { opt.map.addCharAmb(sn.bytes, sn.p, sn.end, enc, oenv.caseFoldFlag); diff --git a/src/org/joni/OptExactInfo.java b/src/org/joni/OptExactInfo.java index ef9fb78..2ad4f27 100644 --- a/src/org/joni/OptExactInfo.java +++ b/src/org/joni/OptExactInfo.java @@ -26,10 +26,8 @@ final class OptExactInfo { final MinMaxLen mmd = new MinMaxLen(); final OptAnchorInfo anchor = new OptAnchorInfo(); - boolean reachEnd; - boolean ignoreCase; - + int ignoreCase; /* -1: unset, 0: case sensitive, 1: ignore case */ final byte bytes[] = new byte[OPT_EXACT_MAXLEN]; int length; @@ -40,9 +38,8 @@ final class OptExactInfo { void clear() { mmd.clear(); anchor.clear(); - reachEnd = false; - ignoreCase = false; + ignoreCase = -1; length = 0; } @@ -57,19 +54,20 @@ final class OptExactInfo { } void concat(OptExactInfo other, Encoding enc) { - if (!ignoreCase && other.ignoreCase) { - if (length >= other.length) return; /* avoid */ - ignoreCase = true; + if (ignoreCase < 0) { + ignoreCase = other.ignoreCase; + } else if (ignoreCase != other.ignoreCase) { + return; } int p = 0; // add->s; int end = p + other.length; int i; - for (i=length; p < end;) { + for (i = length; p < end;) { int len = enc.length(other.bytes, p, end); if (i + len > OPT_EXACT_MAXLEN) break; - for (int j=0; j<len && p < end; j++) { + for (int j = 0; j < len && p < end; j++) { bytes[i++] = other.bytes[p++]; // arraycopy or even don't copy anything ?? } } @@ -79,21 +77,19 @@ final class OptExactInfo { OptAnchorInfo tmp = new OptAnchorInfo(); tmp.concat(anchor, other.anchor, 1, 1); - if (!other.reachEnd) tmp.rightAnchor = 0; + if (!reachEnd) tmp.rightAnchor = 0; anchor.copy(tmp); } - // ?? raw is not used here void concatStr(byte[]lbytes, int p, int end, boolean raw, Encoding enc) { int i; for (i = length; p < end && i < OPT_EXACT_MAXLEN;) { int len = enc.length(lbytes, p, end); if (i + len > OPT_EXACT_MAXLEN) break; - for (int j=0; j<len && p < end; j++) { + for (int j = 0; j < len && p < end; j++) { bytes[i++] = lbytes[p++]; } } - length = i; } @@ -125,7 +121,11 @@ final class OptExactInfo { if (!other.reachEnd || i<other.length || i<length) reachEnd = false; length = i; - ignoreCase |= other.ignoreCase; + if (ignoreCase < 0) { + ignoreCase = other.ignoreCase; + } else if (other.ignoreCase >= 0) { + ignoreCase |= other.ignoreCase; + } anchor.altMerge(other.anchor); @@ -151,8 +151,8 @@ final class OptExactInfo { if (alt.length > 1) v2 += 5; } - if (!ignoreCase) v1 *= 2; - if (!alt.ignoreCase) v2 *= 2; + if (ignoreCase <= 0) v1 *= 2; + if (alt.ignoreCase <= 0) v2 *= 2; if (mmd.compareDistanceValue(alt.mmd, v1, v2) > 0) copy(alt); } @@ -162,7 +162,7 @@ final class OptExactInfo { int compare(OptMapInfo m) { if (m.value <= 0) return -1; - int ve = COMP_EM_BASE * length * (ignoreCase ? 1 : 2); + int ve = COMP_EM_BASE * length * (ignoreCase > 0 ? 1 : 2); int vm = COMP_EM_BASE * 5 * 2 / m.value; return mmd.compareDistanceValue(m.mmd, ve, vm); diff --git a/src/org/joni/Regex.java b/src/org/joni/Regex.java index 92ffdc0..eb281d4 100644 --- a/src/org/joni/Regex.java +++ b/src/org/joni/Regex.java @@ -304,7 +304,7 @@ public final class Regex { exactP = 0; exactEnd = e.length; - if (e.ignoreCase) { + if (e.ignoreCase > 0) { // encodings won't return toLowerTable for case insensitive search if it's not safe to use it directly searchAlgorithm = enc.toLowerCaseTable() != null ? SearchAlgorithm.SLOW_IC_SB : SearchAlgorithm.SLOW_IC; } else { diff --git a/test/org/joni/test/TestU8.java b/test/org/joni/test/TestU8.java index 9a5bdea..3ec6385 100755 --- a/test/org/joni/test/TestU8.java +++ b/test/org/joni/test/TestU8.java @@ -157,6 +157,27 @@ public class TestU8 extends Test { x2s("\\A\\X\\X\\z", "\r\u0308", 0, 3); x2s("\\A\\X\\X\\z", "\n\u0308", 0, 3); + x2s("[0-9-a]+", " 0123456789-a ", 1, 13); + x2s("[0-9-\\s]+", " 0123456789-a ", 0, 12); + x2s("[0-9-あ\\\\/\u0001]+", " 0123456789-あ\\/\u0001 ", 1, 18); + x2s("[a-b-]+", "ab-", 0, 3); + x2s("[a-b-&&-]+", "ab-", 2, 3); + x2s("(?i)[a[b-あ]]+", "abあ", 0, 5); + x2s("(?i)[\\d[:^graph:]]+", "0あ", 0, 1); + x2s("(?ia)[\\d[:^print:]]+", "0あ", 0, 4); + + x2s("(?i:a) B", "a B", 0, 3); + x2s("(?i:a )B", "a B", 0, 3); + x2s("B (?i:a)", "B a", 0, 3); + x2s("B(?i: a)", "B a", 0, 3); + + x2s("(?a)[\\p{Space}\\d]", "\u00a0", 0, 2); + x2s("(?a)[\\d\\p{Space}]", "\u00a0", 0, 2); + ns("(?a)[^\\p{Space}\\d]", "\u00a0"); + ns("(?a)[^\\d\\p{Space}]", "\u00a0"); + x2s("(?d)[[:space:]\\d]", "\u00a0", 0, 2); + ns("(?d)[^\\d[:space:]]", "\u00a0"); + x2s("\\p{In_Unified_Canadian_Aboriginal_Syllabics_Extended}+", "\u18B0\u18FF", 0, 6); x2s("(?i)\u1ffc", "\u2126\u1fbe", 0, 6); x2s("(?i)\u1ffc", "\u1ff3", 0, 3); -- Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/pkg-java/jruby-joni.git _______________________________________________ pkg-java-commits mailing list pkg-java-comm...@lists.alioth.debian.org http://lists.alioth.debian.org/cgi-bin/mailman/listinfo/pkg-java-commits