This is an automated email from the git hooks/post-receive script. henrich pushed a commit to branch debian/sid in repository jruby-joni.
commit 39fa758d7880e22cd65425c6c5ce273ab84076a8 Author: Hideki Yamane <[email protected]> Date: Wed Apr 29 23:22:30 2015 +0900 Imported Upstream version 2.1.6 --- pom.xml | 2 +- src/org/joni/Analyser.java | 33 ++-- src/org/joni/ArrayCompiler.java | 89 ++++++---- src/org/joni/ByteCodeMachine.java | 9 + src/org/joni/ByteCodePrinter.java | 8 + src/org/joni/Lexer.java | 37 +++- src/org/joni/Option.java | 40 +++-- src/org/joni/Parser.java | 128 +++++++++++++- src/org/joni/Regex.java | 7 +- src/org/joni/StackMachine.java | 5 +- src/org/joni/Syntax.java | 54 +++++- src/org/joni/Token.java | 13 +- src/org/joni/ast/AnchorNode.java | 38 ++-- src/org/joni/ast/CClassNode.java | 50 ++++-- src/org/joni/ast/CTypeNode.java | 9 +- src/org/joni/ast/EncloseNode.java | 25 +-- src/org/joni/constants/EncloseType.java | 1 + src/org/joni/constants/OPCode.java | 192 ++++++++++++--------- src/org/joni/constants/OPSize.java | 1 + src/org/joni/constants/RegexState.java | 28 --- src/org/joni/constants/SyntaxProperties.java | 17 +- src/org/joni/constants/TokenType.java | 3 + src/org/joni/exception/ErrorMessages.java | 1 + test/org/joni/test/TestA.java | 28 +++ .../org/joni/test/TestAU.java | 36 +++- test/org/joni/test/TestJoni.java | 10 ++ .../org/joni/test/TestPerl.java | 37 +++- test/org/joni/test/TestU8.java | 8 +- 28 files changed, 632 insertions(+), 277 deletions(-) diff --git a/pom.xml b/pom.xml index f2d345d..9ecfd33 100644 --- a/pom.xml +++ b/pom.xml @@ -4,7 +4,7 @@ <groupId>org.jruby.joni</groupId> <artifactId>joni</artifactId> <packaging>jar</packaging> - <version>2.1.5</version> + <version>2.1.6</version> <name>Joni</name> <description> Java port of Oniguruma: http://www.geocities.jp/kosako3/oniguruma diff --git a/src/org/joni/Analyser.java b/src/org/joni/Analyser.java index 9aa9acc..d871f77 100644 --- a/src/org/joni/Analyser.java +++ b/src/org/joni/Analyser.java @@ -51,7 +51,6 @@ import org.joni.ast.StringNode; import org.joni.constants.AnchorType; import org.joni.constants.EncloseType; import org.joni.constants.NodeType; -import org.joni.constants.RegexState; import org.joni.constants.StackPopLevel; import org.joni.constants.TargetInfo; @@ -62,8 +61,6 @@ final class Analyser extends Parser { } protected final void compile() { - regex.state = RegexState.COMPILING; - if (Config.DEBUG) { Config.log.println(regex.encStringToString(bytes, getBegin(), getEnd())); } @@ -173,13 +170,11 @@ final class Analyser extends Parser { if (Config.DEBUG_COMPILE) { if (Config.USE_NAMED_GROUP) Config.log.print(regex.nameTableToString()); - Config.log.println("stack used: " + regex.stackNeeded); + Config.log.println("stack used: " + regex.requireStack); if (Config.USE_STRING_TEMPLATES) Config.log.print("templates: " + regex.templateNum + "\n"); Config.log.println(new ByteCodePrinter(regex).byteCodeListToString()); } // DEBUG_COMPILE - - regex.state = RegexState.NORMAL; } private void noNameDisableMapFor_cosAlt(Node node, int[]map, Ptr counter) { @@ -225,10 +220,10 @@ final class Analyser extends Parser { private void noNameDisableMapFor_anchor(Node node, int[]map, Ptr counter) { AnchorNode an = (AnchorNode)node; switch (an.type) { - case AnchorNode.PREC_READ: - case AnchorNode.PREC_READ_NOT: - case AnchorNode.LOOK_BEHIND: - case AnchorNode.LOOK_BEHIND_NOT: + case AnchorType.PREC_READ: + case AnchorType.PREC_READ_NOT: + case AnchorType.LOOK_BEHIND: + case AnchorType.LOOK_BEHIND_NOT: an.setTarget(noNameDisableMap(an.target, map, counter)); } } @@ -267,6 +262,10 @@ final class Analyser extends Parser { break; case NodeType.ENCLOSE: + EncloseNode en = (EncloseNode)node; + if (en.type == EncloseType.CONDITION) { + en.regNum = map[en.regNum]; + } renumberByMap(((EncloseNode)node).target, map); break; @@ -383,6 +382,7 @@ final class Analyser extends Parser { case EncloseType.OPTION: case EncloseNode.STOP_BACKTRACK: + case EncloseNode.CONDITION: info = quantifiersMemoryInfo(en.target); break; @@ -498,6 +498,7 @@ final class Analyser extends Parser { case EncloseType.OPTION: case EncloseType.STOP_BACKTRACK: + case EncloseNode.CONDITION: min = getMinMatchLength(en.target); break; } // inner switch @@ -603,6 +604,7 @@ final class Analyser extends Parser { case EncloseType.OPTION: case EncloseType.STOP_BACKTRACK: + case EncloseNode.CONDITION: max = getMaxMatchLength(en.target); break; } // inner switch @@ -715,6 +717,7 @@ final class Analyser extends Parser { case EncloseType.OPTION: case EncloseType.STOP_BACKTRACK: + case EncloseNode.CONDITION: len = getCharLengthTree(en.target, level); break; } // inner switch @@ -938,6 +941,7 @@ final class Analyser extends Parser { case EncloseType.MEMORY: case EncloseType.STOP_BACKTRACK: + case EncloseNode.CONDITION: n = getHeadValueNode(en.target, exact); break; } // inner switch @@ -1890,6 +1894,14 @@ final class Analyser extends Parser { } break; + case EncloseNode.CONDITION: + if (Config.USE_NAMED_GROUP) { + if (!en.isNameRef() && env.numNamed > 0 && syntax.captureOnlyNamedGroup() && !isCaptureGroup(env.option)) { + newValueException(ERR_NUMBERED_BACKREF_OR_CALL_NOT_ALLOWED); + } + } + setupTree(en.target, state); + break; } // inner switch break; @@ -2203,6 +2215,7 @@ final class Analyser extends Parser { break; case EncloseType.STOP_BACKTRACK: + case EncloseType.CONDITION: optimizeNodeLeft(en.target, opt, oenv); break; } // inner switch diff --git a/src/org/joni/ArrayCompiler.java b/src/org/joni/ArrayCompiler.java index ac21d0e..62e44fc 100644 --- a/src/org/joni/ArrayCompiler.java +++ b/src/org/joni/ArrayCompiler.java @@ -96,6 +96,7 @@ final class ArrayCompiler extends Compiler { do { len = compileLengthTree(aln.car); if (aln.cdr != null) { + regex.requireStack = true; addOpcodeRelAddr(OPCode.PUSH, len + OPSize.JUMP); } compileTree(aln.car); @@ -161,6 +162,7 @@ final class ArrayCompiler extends Compiler { int savedNumNullCheck = regex.numNullCheck; if (emptyInfo != 0) { + regex.requireStack = true; addOpcode(OPCode.NULL_CHECK_START); addMemNum(regex.numNullCheck); /* NULL CHECK ID */ regex.numNullCheck++; @@ -419,6 +421,7 @@ final class ArrayCompiler extends Compiler { } private void compileRangeRepeatNode(QuantifierNode qn, int targetLen, int emptyInfo) { + regex.requireStack = true; int numRepeat = regex.numRepeat; addOpcode(qn.greedy ? OPCode.REPEAT : OPCode.REPEAT_NG); addMemNum(numRepeat); /* OP_REPEAT ID */ @@ -517,6 +520,7 @@ final class ArrayCompiler extends Compiler { @Override protected void compileCECQuantifierNode(QuantifierNode qn) { + regex.requireStack = true; boolean infinite = isRepeatInfinite(qn.upper); int emptyInfo = qn.targetEmptyInfo; @@ -691,6 +695,7 @@ final class ArrayCompiler extends Compiler { @Override protected void compileNonCECQuantifierNode(QuantifierNode qn) { + regex.requireStack = true; boolean infinite = isRepeatInfinite(qn.upper); int emptyInfo = qn.targetEmptyInfo; @@ -858,6 +863,21 @@ final class ArrayCompiler extends Compiler { } break; + case EncloseType.CONDITION: + len = OPSize.CONDITION; + if (node.target.getType() == NodeType.ALT) { + ConsAltNode x = (ConsAltNode)node.target; + tlen = compileLengthTree(x.car); /* yes-node */ + len += tlen + OPSize.JUMP; + if (x.cdr == null) newInternalException(ERR_PARSER_BUG); + x = x.cdr; + tlen = compileLengthTree(x.car); /* no-node */ + len += tlen; + if (x.cdr != null) newSyntaxException(ERR_INVALID_CONDITION_PATTERN); + } else { + newInternalException(ERR_PARSER_BUG); + } + break; default: newInternalException(ERR_PARSER_BUG); return 0; // not reached @@ -872,6 +892,7 @@ final class ArrayCompiler extends Compiler { case EncloseType.MEMORY: if (Config.USE_SUBEXP_CALL) { if (node.isCalled()) { + regex.requireStack = true; addOpcode(OPCode.CALL); node.callAddr = codeLength + OPSize.ABSADDR + OPSize.JUMP; node.setAddrFixed(); @@ -888,6 +909,7 @@ final class ArrayCompiler extends Compiler { } // USE_SUBEXP_CALL if (bsAt(regex.btMemStart, node.regNum)) { + regex.requireStack = true; addOpcode(OPCode.MEMORY_START_PUSH); } else { addOpcode(OPCode.MEMORY_START); @@ -915,6 +937,7 @@ final class ArrayCompiler extends Compiler { break; case EncloseType.STOP_BACKTRACK: + regex.requireStack = true; if (node.isStopBtSimpleRepeat()) { QuantifierNode qn = (QuantifierNode)node.target; @@ -932,6 +955,27 @@ final class ArrayCompiler extends Compiler { } break; + case EncloseType.CONDITION: + addOpcode(OPCode.CONDITION); + addMemNum(node.regNum); + if (node.target.getType() == NodeType.ALT) { + ConsAltNode x = (ConsAltNode)node.target; + len = compileLengthTree(x.car); /* yes-node */ + if (x.cdr == null) newInternalException(ERR_PARSER_BUG); + x = x.cdr; + int len2 = compileLengthTree(x.car); /* no-node */ + if (x.cdr != null) newSyntaxException(ERR_INVALID_CONDITION_PATTERN); + x = (ConsAltNode)node.target; + addRelAddr(len + OPSize.JUMP); + compileTree(x.car); /* yes-node */ + addOpcodeRelAddr(OPCode.JUMP, len2); + x = x.cdr; + compileTree(x.car); /* no-node */ + } else { + newInternalException(ERR_PARSER_BUG); + } + break; + default: newInternalException(ERR_PARSER_BUG); break; @@ -1002,13 +1046,19 @@ final class ArrayCompiler extends Compiler { addOpcode(enc.isSingleByte() ? OPCode.WORD_END_SB : OPCode.WORD_END); break; + case AnchorType.KEEP: + addOpcode(OPCode.KEEP); + break; + case AnchorType.PREC_READ: + regex.requireStack = true; addOpcode(OPCode.PUSH_POS); compileTree(node.target); addOpcode(OPCode.POP_POS); break; case AnchorType.PREC_READ_NOT: + regex.requireStack = true; len = compileLengthTree(node.target); addOpcodeRelAddr(OPCode.PUSH_POS_NOT, len + OPSize.FAIL_POS); compileTree(node.target); @@ -1028,6 +1078,7 @@ final class ArrayCompiler extends Compiler { break; case AnchorType.LOOK_BEHIND_NOT: + regex.requireStack = true; len = compileLengthTree(node.target); addOpcodeRelAddr(OPCode.PUSH_LOOK_BEHIND_NOT, len + OPSize.FAIL_LOOK_BEHIND_NOT); if (node.charLength < 0) { @@ -1182,44 +1233,6 @@ final class ArrayCompiler extends Compiler { private void addOpcode(int opcode) { addInt(opcode); - - switch(opcode) { - case OPCode.ANYCHAR_STAR: - case OPCode.ANYCHAR_STAR_SB: - case OPCode.ANYCHAR_ML_STAR: - case OPCode.ANYCHAR_ML_STAR_SB: - case OPCode.ANYCHAR_STAR_PEEK_NEXT: - case OPCode.ANYCHAR_STAR_PEEK_NEXT_SB: - case OPCode.ANYCHAR_ML_STAR_PEEK_NEXT: - case OPCode.ANYCHAR_ML_STAR_PEEK_NEXT_SB: - case OPCode.STATE_CHECK_ANYCHAR_STAR: - case OPCode.STATE_CHECK_ANYCHAR_STAR_SB: - case OPCode.STATE_CHECK_ANYCHAR_ML_STAR: - case OPCode.MEMORY_START_PUSH: - case OPCode.MEMORY_END_PUSH: - case OPCode.MEMORY_END_PUSH_REC: - case OPCode.MEMORY_END_REC: - case OPCode.NULL_CHECK_START: - case OPCode.NULL_CHECK_END_MEMST_PUSH: - case OPCode.PUSH: - case OPCode.STATE_CHECK_PUSH: - case OPCode.STATE_CHECK_PUSH_OR_JUMP: - case OPCode.STATE_CHECK: - case OPCode.PUSH_OR_JUMP_EXACT1: - case OPCode.PUSH_IF_PEEK_NEXT: - case OPCode.REPEAT: - case OPCode.REPEAT_NG: - case OPCode.REPEAT_INC_SG: - case OPCode.REPEAT_INC_NG: - case OPCode.REPEAT_INC_NG_SG: - case OPCode.PUSH_POS: - case OPCode.PUSH_POS_NOT: - case OPCode.PUSH_STOP_BT: - case OPCode.PUSH_LOOK_BEHIND_NOT: - case OPCode.CALL: - case OPCode.RETURN: // it will appear only with CALL though - regex.stackNeeded = true; - } } private void addStateCheckNum(int num) { diff --git a/src/org/joni/ByteCodeMachine.java b/src/org/joni/ByteCodeMachine.java index d5b318c..a6a0cdd 100644 --- a/src/org/joni/ByteCodeMachine.java +++ b/src/org/joni/ByteCodeMachine.java @@ -326,6 +326,7 @@ class ByteCodeMachine extends StackMachine { case OPCode.EXACT1_IC_SB: opExact1ICSb(); break; case OPCode.EXACTN_IC_SB: opExactNICSb(); continue; + case OPCode.CONDITION: opCondition(); continue; case OPCode.FINISH: return finish(); @@ -702,6 +703,14 @@ class ByteCodeMachine extends StackMachine { sprev = s - 1; } + private void opCondition() { + int mem = code[ip++]; + int addr = code[ip++]; + if (mem > regex.numMem || repeatStk[memEndStk + mem] == INVALID_INDEX || repeatStk[memStartStk + mem] == INVALID_INDEX) { + ip += addr; + } + } + private boolean isInBitSet() { int c = bytes[s] & 0xff; return ((code[ip + (c >>> BitSet.ROOM_SHIFT)] & (1 << c)) != 0); diff --git a/src/org/joni/ByteCodePrinter.java b/src/org/joni/ByteCodePrinter.java index 77938da..3ffe9c0 100644 --- a/src/org/joni/ByteCodePrinter.java +++ b/src/org/joni/ByteCodePrinter.java @@ -386,6 +386,14 @@ class ByteCodePrinter { sb.append(":" + scn + ":(" + addr + ")"); break; + case OPCode.CONDITION: + mem = code[bp]; + bp += OPSize.MEMNUM; + addr = code[bp]; + bp += OPSize.RELADDR; + sb.append(":" + mem + ":" + addr); + break; + default: throw new InternalException("undefined code: " + code[--bp]); } diff --git a/src/org/joni/Lexer.java b/src/org/joni/Lexer.java index 0b70271..c1feb68 100644 --- a/src/org/joni/Lexer.java +++ b/src/org/joni/Lexer.java @@ -19,6 +19,7 @@ */ package org.joni; +import static org.joni.Option.isAsciiRange; import static org.joni.Option.isSingleline; import static org.joni.ast.QuantifierNode.isRepeatInfinite; @@ -756,7 +757,7 @@ class Lexer extends ScannerSupport { private void fetchTokenFor_anchor(int subType) { token.type = TokenType.ANCHOR; - token.setAnchor(subType); + token.setAnchorSubtype(subType); } private void fetchTokenFor_xBrace() { @@ -976,13 +977,13 @@ class Lexer extends ScannerSupport { } } - protected final TokenType fetchToken() { + protected final void fetchToken() { // mark(); // out start: while(true) { if (!left()) { token.type = TokenType.EOT; - return token.type; + return; } token.type = TokenType.STRING; @@ -1029,16 +1030,28 @@ class Lexer extends ScannerSupport { if (syntax.opEscWWord()) fetchTokenInCCFor_charType(true, Config.NON_UNICODE_SDW ? CharacterType.W : CharacterType.WORD); break; case 'b': - if (syntax.opEscBWordBound()) fetchTokenFor_anchor(AnchorType.WORD_BOUND); + if (syntax.opEscBWordBound()) { + fetchTokenFor_anchor(AnchorType.WORD_BOUND); + token.setAnchorASCIIRange(isAsciiRange(env.option)); + } break; case 'B': - if (syntax.opEscBWordBound()) fetchTokenFor_anchor(AnchorType.NOT_WORD_BOUND); + if (syntax.opEscBWordBound()) { + fetchTokenFor_anchor(AnchorType.NOT_WORD_BOUND); + token.setAnchorASCIIRange(isAsciiRange(env.option)); + } break; case '<': - if (Config.USE_WORD_BEGIN_END && syntax.opEscLtGtWordBeginEnd()) fetchTokenFor_anchor(AnchorType.WORD_BEGIN); + if (Config.USE_WORD_BEGIN_END && syntax.opEscLtGtWordBeginEnd()) { + fetchTokenFor_anchor(AnchorType.WORD_BEGIN); + token.setAnchorASCIIRange(isAsciiRange(env.option)); + } break; case '>': - if (Config.USE_WORD_BEGIN_END && syntax.opEscLtGtWordBeginEnd()) fetchTokenFor_anchor(AnchorType.WORD_END); + if (Config.USE_WORD_BEGIN_END && syntax.opEscLtGtWordBeginEnd()) { + fetchTokenFor_anchor(AnchorType.WORD_END); + token.setAnchorASCIIRange(isAsciiRange(env.option)); + } break; case 's': if (syntax.opEscSWhiteSpace()) fetchTokenInCCFor_charType(false, Config.NON_UNICODE_SDW ? CharacterType.S : CharacterType.SPACE); @@ -1109,7 +1122,14 @@ class Lexer extends ScannerSupport { case 'P': fetchTokenFor_charProperty(); break; - + case 'R': + if (syntax.op2EscCapitalRLinebreak()) token.type = TokenType.LINEBREAK; + break; + case 'X': + if (syntax.op2EscCapitalXExtendedGraphemeCluster()) token.type = TokenType.EXTENDED_GRAPHEME_CLUSTER; + break; + case 'K': + if (syntax.op2EscCapitalKKeep()) token.type = TokenType.KEEP; default: unfetch(); int num = fetchEscapedValue(); @@ -1219,7 +1239,6 @@ class Lexer extends ScannerSupport { break; } // while - return token.type; } private void greedyCheck() { diff --git a/src/org/joni/Option.java b/src/org/joni/Option.java index 13fbba3..607a6e0 100644 --- a/src/org/joni/Option.java +++ b/src/org/joni/Option.java @@ -23,21 +23,31 @@ public class Option { /* options */ public static final int NONE = 0; - public static final int IGNORECASE = (1<<0); - public static final int EXTEND = (1<<1); - public static final int MULTILINE = (1<<2); - public static final int SINGLELINE = (1<<3); - public static final int FIND_LONGEST = (1<<4); - public static final int FIND_NOT_EMPTY = (1<<5); - public static final int NEGATE_SINGLELINE = (1<<6); - public static final int DONT_CAPTURE_GROUP = (1<<7); - public static final int CAPTURE_GROUP = (1<<8); + public static final int IGNORECASE = (1 << 0); + public static final int EXTEND = (1 << 1); + public static final int MULTILINE = (1 << 2); + public static final int SINGLELINE = (1 << 3); + public static final int FIND_LONGEST = (1 << 4); + public static final int FIND_NOT_EMPTY = (1 << 5); + public static final int NEGATE_SINGLELINE = (1 << 6); + public static final int DONT_CAPTURE_GROUP = (1 << 7); + public static final int CAPTURE_GROUP = (1 << 8); /* options (search time) */ - public static final int NOTBOL = (1<<9); - public static final int NOTEOL = (1<<10); - public static final int POSIX_REGION = (1<<11); - public static final int MAXBIT = (1<<12); /* limit */ + public static final int NOTBOL = (1 << 9); + public static final int NOTEOL = (1 << 10); + public static final int POSIX_REGION = (1 << 11); + + /* options (ctype range) */ + public static final int ASCII_RANGE = (1 << 12); + public static final int POSIX_BRACKET_ALL_RANGE = (1 << 13); + public static final int WORD_BOUND_ALL_RANGE = (1 << 14); + /* options (newline) */ + public static final int NEWLINE_CRLF = (1 << 15); + public static final int NOTBOS = (1 << 16); + public static final int NOTEOS = (1 << 17); + + public static final int MAXBIT = (1 << 18); /* limit */ public static final int DEFAULT = NONE; @@ -112,6 +122,10 @@ public class Option { return (option & POSIX_REGION) != 0; } + public static boolean isAsciiRange(int option) { + return (option & ASCII_RANGE) != 0; + } + /* OP_SET_OPTION is required for these options. ??? */ // public static boolean isDynamic(int option) { // return (option & (MULTILINE | IGNORECASE)) != 0; diff --git a/src/org/joni/Parser.java b/src/org/joni/Parser.java index 419993f..77352b7 100644 --- a/src/org/joni/Parser.java +++ b/src/org/joni/Parser.java @@ -27,10 +27,14 @@ import static org.joni.Option.isIgnoreCase; import org.jcodings.Ptr; import org.jcodings.constants.CharacterType; import org.jcodings.constants.PosixBracket; +import org.jcodings.unicode.UnicodeEncoding; import org.joni.ast.AnchorNode; import org.joni.ast.AnyCharNode; import org.joni.ast.BackRefNode; import org.joni.ast.CClassNode; +import org.joni.ast.CClassNode.CCSTATE; +import org.joni.ast.CClassNode.CCStateArg; +import org.joni.ast.CClassNode.CCVALTYPE; import org.joni.ast.CTypeNode; import org.joni.ast.CallNode; import org.joni.ast.ConsAltNode; @@ -38,10 +42,7 @@ import org.joni.ast.EncloseNode; import org.joni.ast.Node; import org.joni.ast.QuantifierNode; import org.joni.ast.StringNode; -import org.joni.ast.CClassNode.CCStateArg; import org.joni.constants.AnchorType; -import org.joni.constants.CCSTATE; -import org.joni.constants.CCVALTYPE; import org.joni.constants.EncloseType; import org.joni.constants.NodeType; import org.joni.constants.TokenType; @@ -493,6 +494,56 @@ class Parser extends Lexer { } break; + case '(': /* conditional expression: (?(cond)yes), (?(cond)yes|no) */ + if (syntax.op2QMarkLParenCondition()) { + int num = -1; + int name = -1; + fetch(); + if (enc.isDigit(c)) { /* (n) */ + unfetch(); + num = fetchName('(', true); + if (syntax.strictCheckBackref()) { + if (num > env.numMem || env.memNodes == null || env.memNodes[num] == null) newValueException(ERR_INVALID_BACKREF); + } + } else { + if (Config.USE_NAMED_GROUP) { + if (c == '<' || c == '\'') { /* (<name>), ('name') */ + name = p; + num = fetchName(c, false); + int nameEnd = value; + fetch(); + if (c != ')') newSyntaxException(ERR_UNDEFINED_GROUP_OPTION); + NameEntry e = env.reg.nameToGroupNumbers(bytes, name, nameEnd); + if (e == null) newValueException(ERR_UNDEFINED_NAME_REFERENCE, name, nameEnd); + if (syntax.strictCheckBackref()) { + if (e.backNum == 1) { + if (e.backRef1 > env.numMem || + env.memNodes == null || + env.memNodes[e.backRef1] == null) newValueException(ERR_INVALID_BACKREF); + } else { + for (int i=0; i<e.backNum; i++) { + if (e.backRefs[i] > env.numMem || + env.memNodes == null || + env.memNodes[e.backRefs[i]] == null) newValueException(ERR_INVALID_BACKREF); + } + } + } + + num = e.backNum == 1 ? e.backRef1 : e.backRefs[0]; /* XXX: use left most named group as Perl */ + } + } else { // USE_NAMED_GROUP + newSyntaxException(ERR_INVALID_CONDITION_PATTERN); + } + } + EncloseNode en = new EncloseNode(EncloseType.CONDITION); + en.regNum = num; + if (name != -1) en.setNameRef(); + node = en; + } else { + newSyntaxException(ERR_UNDEFINED_GROUP_OPTION); + } + break; + // case 'p': #ifdef USE_POSIXLINE_OPTION case '-': case 'i': @@ -534,6 +585,10 @@ class Parser extends Lexer { // option = bsOnOff(option, Option.MULTILINE|Option.SINGLELINE, neg); // break; + case 'a': /* limits \d, \s, \w and POSIX brackets to ASCII range */ + if ((syntax.op2OptionPerl() || syntax.op2OptionRuby()) && !neg) { + option = bsOnOff(option, Option.ASCII_RANGE, neg); + } default: newSyntaxException(ERR_UNDEFINED_GROUP_OPTION); } // switch @@ -580,7 +635,7 @@ class Parser extends Lexer { Node target = parseSubExp(term); if (node.getType() == NodeType.ANCHOR) { - AnchorNode an = (AnchorNode) node; + AnchorNode an = (AnchorNode)node; an.setTarget(target); if (syntax.op2OptionECMAScript() && an.type == AnchorType.PREC_READ_NOT) { env.popPrecReadNotNode(an); @@ -594,6 +649,10 @@ class Parser extends Lexer { } /* Don't move this to previous of parse_subexp() */ env.setMemNode(en.regNum, node); + } else if (en.type == EncloseType.CONDITION) { + if (target.getType() != NodeType.ALT) { /* convert (?(cond)yes) to (?(cond)yes|empty) */ + en.setTarget(ConsAltNode.newAltNode(target, ConsAltNode.newAltNode(StringNode.EMPTY, null))); + } } } returnCode = 0; @@ -675,6 +734,63 @@ class Parser extends Lexer { } else { return parseExpTkByte(group); // goto tk_byte } + case LINEBREAK: + byte[]buflb = new byte[Config.ENC_CODE_TO_MBC_MAXLEN * 2]; + int len1 = enc.codeToMbc(0x0D, buflb, 0); + int len2 = enc.codeToMbc(0x0A, buflb, len1); + StringNode left = new StringNode(buflb, 0, len1 + len2); + left.setRaw(); + /* [\x0A-\x0D] or [\x0A-\x0D\x{85}\x{2028}\x{2029}] */ + CClassNode right = new CClassNode(); + if (enc.minLength() > 1) { + right.addCodeRange(env, 0x0A, 0x0D); + } else { + right.bs.setRange(0x0A, 0x0D); + } + + if (enc.toString().startsWith("UTF")) { + /* UTF-8, UTF-16BE/LE, UTF-32BE/LE */ + right.addCodeRange(env, 0x85, 0x85); + right.addCodeRange(env, 0x2028, 0x2029); + } + /* (?>...) */ + EncloseNode en = new EncloseNode(EncloseType.STOP_BACKTRACK); + en.setTarget(ConsAltNode.newAltNode(left, ConsAltNode.newAltNode(right, null))); + node = en; + break; + + case EXTENDED_GRAPHEME_CLUSTER: + if (Config.USE_UNICODE_PROPERTIES) { + if (enc instanceof UnicodeEncoding) { + int ctype = enc.propertyNameToCType(new byte[]{(byte)'M'}, 0, 1); + if (ctype > 0) { + CClassNode cc1 = new CClassNode(); /* \P{M} */ + cc1.addCType(ctype, false, env, this); + cc1.setNot(); + CClassNode cc2 = new CClassNode(); /* \p{M}* */ + cc1.addCType(ctype, false, env, this); + QuantifierNode qn = new QuantifierNode(0, QuantifierNode.REPEAT_INFINITE, false); + qn.setTarget(cc2); + /* (?>...) */ + EncloseNode en2 = new EncloseNode(EncloseType.STOP_BACKTRACK); + /* \P{M}\p{M}* */ + en2.setTarget(ConsAltNode.newListNode(cc1, ConsAltNode.newListNode(qn, null))); + node = en2; + } + } + } + if (node == null) { + AnyCharNode np1 = new AnyCharNode(); + EncloseNode on = new EncloseNode(bsOnOff(env.option, Option.MULTILINE, false), 0); + on.setTarget(np1); + node = np1; + } + break; + + case KEEP: + node = new AnchorNode(AnchorType.KEEP); + break; + case STRING: return parseExpTkByte(group); // tk_byte: @@ -711,7 +827,7 @@ class Parser extends Lexer { break; case CharacterType.WORD: - node = new CTypeNode(token.getPropCType(), token.getPropNot()); + node = new CTypeNode(token.getPropCType(), token.getPropNot(), false); break; case CharacterType.SPACE: @@ -807,7 +923,7 @@ class Parser extends Lexer { break; case ANCHOR: - node = new AnchorNode(token.getAnchor()); // possible bug in oniguruma + node = new AnchorNode(token.getAnchorSubtype()); break; case OP_REPEAT: diff --git a/src/org/joni/Regex.java b/src/org/joni/Regex.java index 03adf2b..f5eadbd 100644 --- a/src/org/joni/Regex.java +++ b/src/org/joni/Regex.java @@ -27,25 +27,22 @@ import java.util.IllegalFormatConversionException; import java.util.Iterator; import org.jcodings.Encoding; -import org.jcodings.EncodingDB; import org.jcodings.specific.ASCIIEncoding; import org.jcodings.specific.UTF8Encoding; import org.jcodings.util.BytesHash; import org.joni.constants.AnchorType; -import org.joni.constants.RegexState; import org.joni.exception.ErrorMessages; import org.joni.exception.InternalException; import org.joni.exception.ValueException; -public final class Regex implements RegexState { +public final class Regex { int[] code; /* compiled pattern */ int codeLength; - boolean stackNeeded; + boolean requireStack; Object[]operands; /* e.g. shared CClassNode */ int operandLength; - int state; /* normal, searching, compiling */ // remove int numMem; /* used memory(...) num counted from 1 */ int numRepeat; /* OP_REPEAT/OP_REPEAT_NG id-counter */ int numNullCheck; /* OP_NULL_CHECK_START/END id counter */ diff --git a/src/org/joni/StackMachine.java b/src/org/joni/StackMachine.java index 7b16780..b3d1106 100644 --- a/src/org/joni/StackMachine.java +++ b/src/org/joni/StackMachine.java @@ -43,7 +43,7 @@ abstract class StackMachine extends Matcher implements StackType { protected StackMachine(Regex regex, byte[]bytes, int p , int end) { super(regex, bytes, p, end); - this.stack = regex.stackNeeded ? fetchStack() : null; + this.stack = regex.requireStack ? fetchStack() : null; int n = regex.numRepeat + (regex.numMem << 1); this.repeatStk = n > 0 ? new int[n] : null; @@ -502,6 +502,7 @@ abstract class StackMachine extends Matcher implements StackType { int endp; isNull = 1; while (k < stk) { + e = stack[k++]; if (e.type == MEM_START) { if (e.getMemEnd() == INVALID_INDEX) { isNull = 0; @@ -519,8 +520,6 @@ abstract class StackMachine extends Matcher implements StackType { isNull = -1; /* empty, but position changed */ } } - k++; - e = stack[k]; // !! } break; } diff --git a/src/org/joni/Syntax.java b/src/org/joni/Syntax.java index 4e7b5e7..5ec30f7 100644 --- a/src/org/joni/Syntax.java +++ b/src/org/joni/Syntax.java @@ -189,6 +189,9 @@ public final class Syntax implements SyntaxProperties{ return isOp(OP_ESC_X_BRACE_HEX8); } + public boolean opEscOBraceOctal() { + return isOp(OP_ESC_O_BRACE_OCTAL); + } /** * OP @@ -278,10 +281,55 @@ public final class Syntax implements SyntaxProperties{ return isOp2(OP2_INEFFECTIVE_ESCAPE); } + public boolean op2EscCapitalRLinebreak() { + return isOp2(OP2_ESC_CAPITAL_R_LINEBREAK); + } + + public boolean op2EscCapitalXExtendedGraphemeCluster() { + return isOp2(OP2_ESC_CAPITAL_X_EXTENDED_GRAPHEME_CLUSTER); + } + + public boolean op2EscVVerticalWhiteSpace() { + return isOp2(OP2_ESC_V_VERTICAL_WHITESPACE); + } + + public boolean op2EscHHorizontalWhiteSpace() { + return isOp2(OP2_ESC_H_HORIZONTAL_WHITESPACE); + } + + public boolean op2EscCapitalKKeep() { + return isOp2(OP2_ESC_CAPITAL_K_KEEP); + } + + public boolean op2EscGBraceBackref() { + return isOp2(OP2_ESC_G_BRACE_BACKREF); + } + + public boolean op2QMarkSubexpCall() { + return isOp2(OP2_QMARK_SUBEXP_CALL); + } + + public boolean op2QMarkBarBranchReset() { + return isOp2(OP2_QMARK_BAR_BRANCH_RESET); + } + + public boolean op2QMarkLParenCondition() { + return isOp2(OP2_QMARK_LPAREN_CONDITION); + } + + public boolean op2QMarkCapitalPNamedGroup() { + return isOp2(OP2_QMARK_CAPITAL_P_NAMED_GROUP); + } + + public boolean op2OptionJava() { + return isOp2(OP2_OPTION_JAVA); + } + public boolean op2OptionECMAScript() { return isOp2(OP2_OPTION_ECMASCRIPT); } + /** * BEHAVIOR * @@ -371,7 +419,11 @@ public final class Syntax implements SyntaxProperties{ OP2_PLUS_POSSESSIVE_REPEAT | OP2_CCLASS_SET_OP | OP2_ESC_CAPITAL_C_BAR_CONTROL | OP2_ESC_CAPITAL_M_BAR_META | OP2_ESC_V_VTAB | - OP2_ESC_H_XDIGIT ), + OP2_ESC_H_XDIGIT | + OP2_ESC_CAPITAL_X_EXTENDED_GRAPHEME_CLUSTER | + OP2_QMARK_LPAREN_CONDITION | + OP2_ESC_CAPITAL_R_LINEBREAK // | OP2_ESC_CAPITAL_K_KEEP + ), ( GNU_REGEX_BV | ALLOW_INTERVAL_LOW_ABBREV | diff --git a/src/org/joni/Token.java b/src/org/joni/Token.java index 8ad7330..321ad91 100644 --- a/src/org/joni/Token.java +++ b/src/org/joni/Token.java @@ -46,18 +46,19 @@ final class Token { INT1 = code; } - int getAnchor() { + int getAnchorSubtype() { return INT1; } - void setAnchor(int anchor) { + void setAnchorSubtype(int anchor) { INT1 = anchor; } - int getSubtype() { - return INT1; + boolean getAnchorASCIIRange() { + return INT2 == 1; } - void setSubtype(int subtype) { - INT1 = subtype; + + void setAnchorASCIIRange(boolean ascii) { + INT2 = ascii ? 1 : 0; } // repeat union member diff --git a/src/org/joni/ast/AnchorNode.java b/src/org/joni/ast/AnchorNode.java index 988d994..498f1a6 100644 --- a/src/org/joni/ast/AnchorNode.java +++ b/src/org/joni/ast/AnchorNode.java @@ -21,8 +21,9 @@ package org.joni.ast; import org.joni.constants.AnchorType; -public final class AnchorNode extends Node implements AnchorType { - public int type; +public final class AnchorNode extends Node { + public final int type; + public boolean asciiRange; public Node target; public int charLength; @@ -60,28 +61,29 @@ public final class AnchorNode extends Node implements AnchorType { public String toString(int level) { StringBuilder value = new StringBuilder(); value.append("\n type: " + typeToString()); + value.append("\n ascii: " + asciiRange); value.append("\n target: " + pad(target, level + 1)); return value.toString(); } public String typeToString() { StringBuilder type = new StringBuilder(); - if (isType(BEGIN_BUF)) type.append("BEGIN_BUF "); - if (isType(BEGIN_LINE)) type.append("BEGIN_LINE "); - if (isType(BEGIN_POSITION)) type.append("BEGIN_POSITION "); - if (isType(END_BUF)) type.append("END_BUF "); - if (isType(SEMI_END_BUF)) type.append("SEMI_END_BUF "); - if (isType(END_LINE)) type.append("END_LINE "); - if (isType(WORD_BOUND)) type.append("WORD_BOUND "); - if (isType(NOT_WORD_BOUND)) type.append("NOT_WORD_BOUND "); - if (isType(WORD_BEGIN)) type.append("WORD_BEGIN "); - if (isType(WORD_END)) type.append("WORD_END "); - if (isType(PREC_READ)) type.append("PREC_READ "); - if (isType(PREC_READ_NOT)) type.append("PREC_READ_NOT "); - if (isType(LOOK_BEHIND)) type.append("LOOK_BEHIND "); - if (isType(LOOK_BEHIND_NOT)) type.append("LOOK_BEHIND_NOT "); - if (isType(ANYCHAR_STAR)) type.append("ANYCHAR_STAR "); - if (isType(ANYCHAR_STAR_ML)) type.append("ANYCHAR_STAR_ML "); + if (isType(AnchorType.BEGIN_BUF)) type.append("BEGIN_BUF "); + if (isType(AnchorType.BEGIN_LINE)) type.append("BEGIN_LINE "); + if (isType(AnchorType.BEGIN_POSITION)) type.append("BEGIN_POSITION "); + if (isType(AnchorType.END_BUF)) type.append("END_BUF "); + if (isType(AnchorType.SEMI_END_BUF)) type.append("SEMI_END_BUF "); + if (isType(AnchorType.END_LINE)) type.append("END_LINE "); + if (isType(AnchorType.WORD_BOUND)) type.append("WORD_BOUND "); + if (isType(AnchorType.NOT_WORD_BOUND)) type.append("NOT_WORD_BOUND "); + if (isType(AnchorType.WORD_BEGIN)) type.append("WORD_BEGIN "); + if (isType(AnchorType.WORD_END)) type.append("WORD_END "); + if (isType(AnchorType.PREC_READ)) type.append("PREC_READ "); + if (isType(AnchorType.PREC_READ_NOT)) type.append("PREC_READ_NOT "); + if (isType(AnchorType.LOOK_BEHIND)) type.append("LOOK_BEHIND "); + if (isType(AnchorType.LOOK_BEHIND_NOT)) type.append("LOOK_BEHIND_NOT "); + if (isType(AnchorType.ANYCHAR_STAR)) type.append("ANYCHAR_STAR "); + if (isType(AnchorType.ANYCHAR_STAR_ML)) type.append("ANYCHAR_STAR_ML "); return type.toString(); } diff --git a/src/org/joni/ast/CClassNode.java b/src/org/joni/ast/CClassNode.java index 6dd5d7f..f47b100 100644 --- a/src/org/joni/ast/CClassNode.java +++ b/src/org/joni/ast/CClassNode.java @@ -29,8 +29,6 @@ import org.joni.BitSet; import org.joni.CodeRangeBuffer; import org.joni.Config; import org.joni.ScanEnvironment; -import org.joni.constants.CCSTATE; -import org.joni.constants.CCVALTYPE; import org.joni.exception.ErrorMessages; import org.joni.exception.InternalException; import org.joni.exception.SyntaxException; @@ -267,24 +265,24 @@ public final class CClassNode extends Node { if (!not) { for (int i=0; i<n; i++) { - for (int j=mbr[i * 2 + 1]; j<=mbr[i * 2 + 2]; j++) { + for (int j=CR_FROM(mbr, i); j<=CR_TO(mbr, i); j++) { if (j >= sbOut) { if (Config.VANILLA) { - if (j == mbr[i * 2 + 2]) { + if (j == CR_TO(mbr, i)) { i++; - } else if (j > mbr[i * 2 + 1]) { - addCodeRangeToBuf(j, mbr[i * 2 + 2]); + } else if (j > CR_FROM(mbr, i)) { + addCodeRangeToBuf(j, CR_TO(mbr, i)); i++; } } else { - if (j >= mbr[i * 2 + 1]) { - addCodeRangeToBuf(j, mbr[i * 2 + 2]); + if (j >= CR_FROM(mbr, i)) { + addCodeRangeToBuf(j, CR_TO(mbr, i)); i++; } } // !goto sb_end!, remove duplication! for (; i<n; i++) { - addCodeRangeToBuf(mbr[2 * i + 1], mbr[2 * i + 2]); + addCodeRangeToBuf(CR_FROM(mbr, i), CR_TO(mbr, i)); } return; } @@ -293,27 +291,27 @@ public final class CClassNode extends Node { } // !sb_end:! for (int i=0; i<n; i++) { - addCodeRangeToBuf(mbr[2 * i + 1], mbr[2 * i + 2]); + addCodeRangeToBuf(CR_FROM(mbr, i), CR_TO(mbr, i)); } } else { int prev = 0; for (int i=0; i<n; i++) { - for (int j=prev; j < mbr[2 * i + 1]; j++) { + for (int j=prev; j < CR_FROM(mbr, i); j++) { if (j >= sbOut) { // !goto sb_end2!, remove duplication prev = sbOut; for (i=0; i<n; i++) { - if (prev < mbr[2 * i + 1]) addCodeRangeToBuf(prev, mbr[i * 2 + 1] - 1); - prev = mbr[i * 2 + 2] + 1; + if (prev < CR_FROM(mbr, i)) addCodeRangeToBuf(prev, CR_FROM(mbr, i) - 1); + prev = CR_TO(mbr, i) + 1; } if (prev < 0x7fffffff/*!!!*/) addCodeRangeToBuf(prev, 0x7fffffff); return; } bs.set(j); } - prev = mbr[2 * i + 2] + 1; + prev = CR_TO(mbr, i) + 1; } for (int j=prev; j<sbOut; j++) { @@ -323,8 +321,8 @@ public final class CClassNode extends Node { // !sb_end2:! prev = sbOut; for (int i=0; i<n; i++) { - if (prev < mbr[2 * i + 1]) addCodeRangeToBuf(prev, mbr[i * 2 + 1] - 1); - prev = mbr[i * 2 + 2] + 1; + if (prev < CR_FROM(mbr, i)) addCodeRangeToBuf(prev, CR_FROM(mbr, i) - 1); + prev = CR_TO(mbr, i) + 1; } if (prev < 0x7fffffff/*!!!*/) addCodeRangeToBuf(prev, 0x7fffffff); } @@ -425,6 +423,19 @@ public final class CClassNode extends Node { } // switch } + public static enum CCVALTYPE { + SB, + CODE_POINT, + CLASS + } + + public static enum CCSTATE { + VALUE, + RANGE, + COMPLETE, + START + } + public static final class CCStateArg { public int v; public int vs; @@ -567,4 +578,11 @@ public final class CClassNode extends Node { return (flags & FLAG_NCCLASS_SHARE) != 0; } + private static int CR_FROM(int[] range, int i) { + return range[(i * 2) + 1]; + } + + private static int CR_TO(int[] range, int i) { + return range[(i * 2) + 2]; + } } diff --git a/src/org/joni/ast/CTypeNode.java b/src/org/joni/ast/CTypeNode.java index 017ce54..0472e9a 100644 --- a/src/org/joni/ast/CTypeNode.java +++ b/src/org/joni/ast/CTypeNode.java @@ -20,12 +20,14 @@ package org.joni.ast; public final class CTypeNode extends Node { - public int ctype; - public boolean not; + public final int ctype; + public final boolean not; + public final boolean asciiRange; - public CTypeNode(int type, boolean not) { + public CTypeNode(int type, boolean not, boolean asciiRange) { this.ctype= type; this.not = not; + this.asciiRange = asciiRange; } @Override @@ -43,6 +45,7 @@ public final class CTypeNode extends Node { StringBuilder value = new StringBuilder(); value.append("\n ctype: " + ctype); value.append("\n not: " + not); + value.append("\n ascii: " + asciiRange); return value.toString(); } diff --git a/src/org/joni/ast/EncloseNode.java b/src/org/joni/ast/EncloseNode.java index 7c45d14..0ce827e 100644 --- a/src/org/joni/ast/EncloseNode.java +++ b/src/org/joni/ast/EncloseNode.java @@ -101,6 +101,7 @@ public final class EncloseNode extends StateNode implements EncloseType { if (isStopBacktrack()) types.append("STOP_BACKTRACK "); if (isMemory()) types.append("MEMORY "); if (isOption()) types.append("OPTION "); + if (isCondition()) types.append("CONDITION "); return types.toString(); } @@ -113,36 +114,16 @@ public final class EncloseNode extends StateNode implements EncloseType { state &= ~flag; } - public void clearMemory() { - type &= ~MEMORY; - } - - public void setMemory() { - type |= MEMORY; - } - public boolean isMemory() { return (type & MEMORY) != 0; } - public void clearOption() { - type &= ~OPTION; - } - - public void setOption() { - type |= OPTION; - } - public boolean isOption() { return (type & OPTION) != 0; } - public void clearStopBacktrack() { - type &= ~STOP_BACKTRACK; - } - - public void setStopBacktrack() { - type |= STOP_BACKTRACK; + public boolean isCondition() { + return (type & CONDITION) != 0; } public boolean isStopBacktrack() { diff --git a/src/org/joni/constants/EncloseType.java b/src/org/joni/constants/EncloseType.java index 125af0c..13d42b6 100644 --- a/src/org/joni/constants/EncloseType.java +++ b/src/org/joni/constants/EncloseType.java @@ -23,6 +23,7 @@ public interface EncloseType { final int MEMORY = 1<<0; final int OPTION = 1<<1; final int STOP_BACKTRACK = 1<<2; + final int CONDITION = 1<<3; final int ALLOWED_IN_LB = MEMORY; final int ALLOWED_IN_LB_NOT = 0; diff --git a/src/org/joni/constants/OPCode.java b/src/org/joni/constants/OPCode.java index 05d1f8b..06d0feb 100644 --- a/src/org/joni/constants/OPCode.java +++ b/src/org/joni/constants/OPCode.java @@ -63,94 +63,102 @@ public interface OPCode { final int WORD_BEGIN = 33; final int WORD_END = 34; - final int BEGIN_BUF = 35; - final int END_BUF = 36; - final int BEGIN_LINE = 37; - final int END_LINE = 38; - final int SEMI_END_BUF = 39; - final int BEGIN_POSITION = 40; - - final int BACKREF1 = 41; - final int BACKREF2 = 42; - final int BACKREFN = 43; - final int BACKREFN_IC = 44; - final int BACKREF_MULTI = 45; - final int BACKREF_MULTI_IC = 46; - final int BACKREF_WITH_LEVEL = 47; /* \k<xxx+n>, \k<xxx-n> */ - - final int MEMORY_START = 48; - final int MEMORY_START_PUSH = 49; /* push back-tracker to stack */ - final int MEMORY_END_PUSH = 50; /* push back-tracker to stack */ - final int MEMORY_END_PUSH_REC = 51; /* push back-tracker to stack */ - final int MEMORY_END = 52; - final int MEMORY_END_REC = 53; /* push marker to stack */ - - final int FAIL = 54; /* pop stack and move */ - final int JUMP = 55; - final int PUSH = 56; - final int POP = 57; - final int PUSH_OR_JUMP_EXACT1 = 58; /* if match exact then push, else jump. */ - final int PUSH_IF_PEEK_NEXT = 59; /* if match exact then push, else none. */ - - final int REPEAT = 60; /* {n,m} */ - final int REPEAT_NG = 61; /* {n,m}? (non greedy) */ - final int REPEAT_INC = 62; - final int REPEAT_INC_NG = 63; /* non greedy */ - final int REPEAT_INC_SG = 64; /* search and get in stack */ - final int REPEAT_INC_NG_SG = 65; /* search and get in stack (non greedy) */ - - final int NULL_CHECK_START = 66; /* null loop checker start */ - final int NULL_CHECK_END = 67; /* null loop checker end */ - final int NULL_CHECK_END_MEMST = 68; /* null loop checker end (with capture status) */ - final int NULL_CHECK_END_MEMST_PUSH = 69; /* with capture status and push check-end */ - - final int PUSH_POS = 70; /* (?=...) start */ - final int POP_POS = 71; /* (?=...) end */ - final int PUSH_POS_NOT = 72; /* (?!...) start */ - final int FAIL_POS = 73; /* (?!...) end */ - final int PUSH_STOP_BT = 74; /* (?>...) start */ - final int POP_STOP_BT = 75; /* (?>...) end */ - final int LOOK_BEHIND = 76; /* (?<=...) start (no needs end opcode) */ - final int PUSH_LOOK_BEHIND_NOT = 77; /* (?<!...) start */ - final int FAIL_LOOK_BEHIND_NOT = 78; /* (?<!...) end */ - - final int CALL = 79; /* \g<name> */ - final int RETURN = 80; - - final int STATE_CHECK_PUSH = 81; /* combination explosion check and push */ - final int STATE_CHECK_PUSH_OR_JUMP = 82; /* check ok -> push, else jump */ - final int STATE_CHECK = 83; /* check only */ - final int STATE_CHECK_ANYCHAR_STAR = 84; - final int STATE_CHECK_ANYCHAR_ML_STAR = 85; + final int ASCII_WORD = 35; + final int NOT_ASCII_WORD = 36; + final int ASCII_WORD_BOUND = 37; + final int NOT_ASCII_WORD_BOUND = 38; + final int ASCII_WORD_BEGIN = 39; + final int ASCII_WORD_END = 40; + + final int BEGIN_BUF = 41; + final int END_BUF = 42; + final int BEGIN_LINE = 43; + final int END_LINE = 44; + final int SEMI_END_BUF = 45; + final int BEGIN_POSITION = 46; + + final int BACKREF1 = 47; + final int BACKREF2 = 48; + final int BACKREFN = 49; + final int BACKREFN_IC = 50; + final int BACKREF_MULTI = 51; + final int BACKREF_MULTI_IC = 52; + final int BACKREF_WITH_LEVEL = 53; /* \k<xxx+n>, \k<xxx-n> */ + + final int MEMORY_START = 54; + final int MEMORY_START_PUSH = 55; /* push back-tracker to stack */ + final int MEMORY_END_PUSH = 56; /* push back-tracker to stack */ + final int MEMORY_END_PUSH_REC = 57; /* push back-tracker to stack */ + final int MEMORY_END = 58; + final int MEMORY_END_REC = 59; /* push marker to stack */ + + final int KEEP = 60; + final int FAIL = 61; /* pop stack and move */ + final int JUMP = 62; + final int PUSH = 63; + final int POP = 64; + final int PUSH_OR_JUMP_EXACT1 = 65; /* if match exact then push, else jump. */ + final int PUSH_IF_PEEK_NEXT = 66; /* if match exact then push, else none. */ + + final int REPEAT = 67; /* {n,m} */ + final int REPEAT_NG = 68; /* {n,m}? (non greedy) */ + final int REPEAT_INC = 69; + final int REPEAT_INC_NG = 70; /* non greedy */ + final int REPEAT_INC_SG = 71; /* search and get in stack */ + final int REPEAT_INC_NG_SG = 72; /* search and get in stack (non greedy) */ + + final int NULL_CHECK_START = 73; /* null loop checker start */ + final int NULL_CHECK_END = 74; /* null loop checker end */ + final int NULL_CHECK_END_MEMST = 75; /* null loop checker end (with capture status) */ + final int NULL_CHECK_END_MEMST_PUSH = 76; /* with capture status and push check-end */ + + final int PUSH_POS = 77; /* (?=...) start */ + final int POP_POS = 78; /* (?=...) end */ + final int PUSH_POS_NOT = 79; /* (?!...) start */ + final int FAIL_POS = 80; /* (?!...) end */ + final int PUSH_STOP_BT = 81; /* (?>...) start */ + final int POP_STOP_BT = 82; /* (?>...) end */ + final int LOOK_BEHIND = 83; /* (?<=...) start (no needs end opcode) */ + final int PUSH_LOOK_BEHIND_NOT = 84; /* (?<!...) start */ + final int FAIL_LOOK_BEHIND_NOT = 85; /* (?<!...) end */ + + final int CALL = 86; /* \g<name> */ + final int RETURN = 87; + final int CONDITION = 88; + + final int STATE_CHECK_PUSH = 89; /* combination explosion check and push */ + final int STATE_CHECK_PUSH_OR_JUMP = 90; /* check ok -> push, else jump */ + final int STATE_CHECK = 91; /* check only */ + final int STATE_CHECK_ANYCHAR_STAR = 92; + final int STATE_CHECK_ANYCHAR_ML_STAR = 93; /* no need: IS_DYNAMIC_OPTION() == 0 */ - final int SET_OPTION_PUSH = 86; /* set option and push recover option */ - final int SET_OPTION = 87; /* set option */ + final int SET_OPTION_PUSH = 94; /* set option and push recover option */ + final int SET_OPTION = 95; /* set option */ // single byte versions - final int ANYCHAR_SB = 88; /* "." */ - final int ANYCHAR_ML_SB = 89; /* "." multi-line */ - final int ANYCHAR_STAR_SB = 90; /* ".*" */ - final int ANYCHAR_ML_STAR_SB = 91; /* ".*" multi-line */ - final int ANYCHAR_STAR_PEEK_NEXT_SB = 92; - final int ANYCHAR_ML_STAR_PEEK_NEXT_SB = 93; - final int STATE_CHECK_ANYCHAR_STAR_SB = 94; - final int STATE_CHECK_ANYCHAR_ML_STAR_SB= 95; - - final int CCLASS_SB = 96; - final int CCLASS_NOT_SB = 97; - final int WORD_SB = 98; - final int NOT_WORD_SB = 99; - final int WORD_BOUND_SB = 100; - final int NOT_WORD_BOUND_SB = 101; - final int WORD_BEGIN_SB = 102; - final int WORD_END_SB = 103; - - final int LOOK_BEHIND_SB = 104; - - final int EXACT1_IC_SB = 105; /* single byte, N = 1, ignore case */ - final int EXACTN_IC_SB = 106; /* single byte, ignore case */ - + final int ANYCHAR_SB = 96; /* "." */ + final int ANYCHAR_ML_SB = 97; /* "." multi-line */ + final int ANYCHAR_STAR_SB = 98; /* ".*" */ + final int ANYCHAR_ML_STAR_SB = 99; /* ".*" multi-line */ + final int ANYCHAR_STAR_PEEK_NEXT_SB = 100; + final int ANYCHAR_ML_STAR_PEEK_NEXT_SB = 101; + final int STATE_CHECK_ANYCHAR_STAR_SB = 102; + final int STATE_CHECK_ANYCHAR_ML_STAR_SB= 103; + + final int CCLASS_SB = 104; + final int CCLASS_NOT_SB = 105; + final int WORD_SB = 106; + final int NOT_WORD_SB = 107; + final int WORD_BOUND_SB = 108; + final int NOT_WORD_BOUND_SB = 109; + final int WORD_BEGIN_SB = 110; + final int WORD_END_SB = 111; + + final int LOOK_BEHIND_SB = 112; + + final int EXACT1_IC_SB = 113; /* single byte, N = 1, ignore case */ + final int EXACTN_IC_SB = 114; /* single byte, ignore case */ public final String OpCodeNames[] = Config.DEBUG_COMPILE ? new String[] { "finish", /*OP_FINISH*/ @@ -188,6 +196,12 @@ public interface OPCode { "not-word-bound", /*OP_NOT_WORD_BOUND*/ "word-begin", /*OP_WORD_BEGIN*/ "word-end", /*OP_WORD_END*/ + "ascii-word", /*OP_ASCII_WORD*/ + "not-ascii-word", /*OP_NOT_ASCII_WORD*/ + "ascii-word-bound", /*OP_ASCII_WORD_BOUND*/ + "not-ascii-word-bound", /*OP_NOT_ASCII_WORD_BOUND*/ + "ascii-word-begin", /*OP_ASCII_WORD_BEGIN*/ + "ascii-word-end", /*OP_ASCII_WORD_END*/ "begin-buf", /*OP_BEGIN_BUF*/ "end-buf", /*OP_END_BUF*/ "begin-line", /*OP_BEGIN_LINE*/ @@ -207,6 +221,7 @@ public interface OPCode { "mem-end-push-rec", /*OP_MEMORY_END_PUSH_REC*/ "mem-end", /*OP_MEMORY_END*/ "mem-end-rec", /*OP_MEMORY_END_REC*/ + "keep", /*OP_KEEP*/ "fail", /*OP_FAIL*/ "jump", /*OP_JUMP*/ "push", /*OP_PUSH*/ @@ -234,6 +249,7 @@ public interface OPCode { "fail-look-behind-not", /*OP_FAIL_LOOK_BEHIND_NOT*/ "call", /*OP_CALL*/ "return", /*OP_RETURN*/ + "condition", /*OP_CONDITION*/ "state-check-push", /*OP_STATE_CHECK_PUSH*/ "state-check-push-or-jump", /*OP_STATE_CHECK_PUSH_OR_JUMP*/ "state-check", /*OP_STATE_CHECK*/ @@ -305,6 +321,12 @@ public interface OPCode { Arguments.NON, /*OP_NOT_WORD_BOUND*/ Arguments.NON, /*OP_WORD_BEGIN*/ Arguments.NON, /*OP_WORD_END*/ + Arguments.NON, /*OP_ASCII_WORD*/ + Arguments.NON, /*OP_NOT_ASCII_WORD*/ + Arguments.NON, /*OP_ASCII_WORD_BOUND*/ + Arguments.NON, /*OP_NOT_ASCII_WORD_BOUND*/ + Arguments.NON, /*OP_ASCII_WORD_BEGIN*/ + Arguments.NON, /*OP_ASCII_WORD_END*/ Arguments.NON, /*OP_BEGIN_BUF*/ Arguments.NON, /*OP_END_BUF*/ Arguments.NON, /*OP_BEGIN_LINE*/ @@ -324,6 +346,7 @@ public interface OPCode { Arguments.MEMNUM, /*OP_MEMORY_END_PUSH_REC*/ Arguments.MEMNUM, /*OP_MEMORY_END*/ Arguments.MEMNUM, /*OP_MEMORY_END_REC*/ + Arguments.NON, /*OP_KEEP*/ Arguments.NON, /*OP_FAIL*/ Arguments.RELADDR, /*OP_JUMP*/ Arguments.RELADDR, /*OP_PUSH*/ @@ -351,6 +374,7 @@ public interface OPCode { Arguments.NON, /*OP_FAIL_LOOK_BEHIND_NOT*/ Arguments.ABSADDR, /*OP_CALL*/ Arguments.NON, /*OP_RETURN*/ + Arguments.SPECIAL, /*OP_CONDITION*/ Arguments.SPECIAL, /*OP_STATE_CHECK_PUSH*/ Arguments.SPECIAL, /*OP_STATE_CHECK_PUSH_OR_JUMP*/ Arguments.STATE_CHECK, /*OP_STATE_CHECK*/ diff --git a/src/org/joni/constants/OPSize.java b/src/org/joni/constants/OPSize.java index d5595ad..fa3d3c6 100644 --- a/src/org/joni/constants/OPSize.java +++ b/src/org/joni/constants/OPSize.java @@ -67,6 +67,7 @@ public interface OPSize { final int FAIL_LOOK_BEHIND_NOT = OPCODE; final int CALL = (OPCODE + ABSADDR); final int RETURN = OPCODE; + final int CONDITION = (OPCODE + MEMNUM + RELADDR); // #ifdef USE_COMBINATION_EXPLOSION_CHECK final int STATE_CHECK = (OPCODE + STATE_CHECK_NUM); diff --git a/src/org/joni/constants/RegexState.java b/src/org/joni/constants/RegexState.java deleted file mode 100644 index 72dd3ff..0000000 --- a/src/org/joni/constants/RegexState.java +++ /dev/null @@ -1,28 +0,0 @@ -/* - * Permission is hereby granted, free of charge, to any person obtaining a copy of - * this software and associated documentation files (the "Software"), to deal in - * the Software without restriction, including without limitation the rights to - * use, copy, modify, merge, publish, distribute, sublicense, and/or sell copies - * of the Software, and to permit persons to whom the Software is furnished to do - * so, subject to the following conditions: - * - * The above copyright notice and this permission notice shall be included in all - * copies or substantial portions of the Software. - * - * THE SOFTWARE IS PROVIDED "AS IS", WITHOUT WARRANTY OF ANY KIND, EXPRESS OR - * IMPLIED, INCLUDING BUT NOT LIMITED TO THE WARRANTIES OF MERCHANTABILITY, - * FITNESS FOR A PARTICULAR PURPOSE AND NONINFRINGEMENT. IN NO EVENT SHALL THE - * AUTHORS OR COPYRIGHT HOLDERS BE LIABLE FOR ANY CLAIM, DAMAGES OR OTHER - * LIABILITY, WHETHER IN AN ACTION OF CONTRACT, TORT OR OTHERWISE, ARISING FROM, - * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE - * SOFTWARE. - */ -package org.joni.constants; - -// we dont need this ATM -public interface RegexState { - final int NORMAL = 0; - final int SEARCHING = 1; - final int COMPILING = -1; - final int MODIFY = -2; -} diff --git a/src/org/joni/constants/SyntaxProperties.java b/src/org/joni/constants/SyntaxProperties.java index 075324c..46861f2 100644 --- a/src/org/joni/constants/SyntaxProperties.java +++ b/src/org/joni/constants/SyntaxProperties.java @@ -52,10 +52,11 @@ public interface SyntaxProperties { final int OP_ESC_OCTAL3 = (1<<28); /* \OOO */ final int OP_ESC_X_HEX2 = (1<<29); /* \xHH */ final int OP_ESC_X_BRACE_HEX8 = (1<<30); /* \x{7HHHHHHH} */ + final int OP_ESC_O_BRACE_OCTAL = (1<<31); /* \o{OOO} */ final int OP2_ESC_CAPITAL_Q_QUOTE = (1<<0); /* \Q...\E */ final int OP2_QMARK_GROUP_EFFECT = (1<<1); /* (?...); */ - final int OP2_OPTION_PERL = (1<<2); /* (?imsx);,(?-imsx); */ + final int OP2_OPTION_PERL = (1<<2); /* (?imsxadlu), (?-imsx), (?^imsxalu) */ final int OP2_OPTION_RUBY = (1<<3); /* (?imx);, (?-imx); */ final int OP2_PLUS_POSSESSIVE_REPEAT = (1<<4); /* ?+,*+,++ */ final int OP2_PLUS_POSSESSIVE_INTERVAL = (1<<5); /* {n,m}+ */ @@ -74,7 +75,18 @@ public interface SyntaxProperties { /* final int OP2_CHAR_PROPERTY_PREFIX_IS = (1<<18); */ final int OP2_ESC_H_XDIGIT = (1<<19); /* \h, \H */ final int OP2_INEFFECTIVE_ESCAPE = (1<<20); /* \ */ - final int OP2_OPTION_ECMASCRIPT = (1<<21); /* EcmaScript quirks */ + final int OP2_ESC_CAPITAL_R_LINEBREAK = (1<<21); /* \R as (?>\x0D\x0A|[\x0A-\x0D\x{85}\x{2028}\x{2029}]) */ + final int OP2_ESC_CAPITAL_X_EXTENDED_GRAPHEME_CLUSTER = (1<<22); /* \X as (?:\P{M}\p{M}*) */ + final int OP2_ESC_V_VERTICAL_WHITESPACE = (1<<23); /* \v, \V -- Perl */ + final int OP2_ESC_H_HORIZONTAL_WHITESPACE= (1<<24); /* \h, \H -- Perl */ + final int OP2_ESC_CAPITAL_K_KEEP = (1<<25); /* \K */ + final int OP2_ESC_G_BRACE_BACKREF = (1<<26); /* \g{name}, \g{n} */ + final int OP2_QMARK_SUBEXP_CALL = (1<<27); /* (?&name), (?n), (?R), (?0) */ + final int OP2_QMARK_BAR_BRANCH_RESET = (1<<28); /* (?|...) */ + final int OP2_QMARK_LPAREN_CONDITION = (1<<29); /* (?(cond)yes...|no...) */ + final int OP2_QMARK_CAPITAL_P_NAMED_GROUP= (1<<30); /* (?P<name>...), (?P=name), (?P>name) -- Python/PCRE */ + final int OP2_OPTION_JAVA = (1<<31); /* (?idmsux), (?-idmsux) */ + final int OP2_OPTION_ECMASCRIPT = (1<<32); /* EcmaScript quirks */ /* syntax (behavior); */ final int CONTEXT_INDEP_ANCHORS = (1<<31); /* not implemented */ @@ -88,6 +100,7 @@ public interface SyntaxProperties { final int CAPTURE_ONLY_NAMED_GROUP = (1<<7); /* see doc/RE */ final int ALLOW_MULTIPLEX_DEFINITION_NAME = (1<<8); /* (?<x>);(?<x>); */ final int FIXED_INTERVAL_IS_GREEDY_ONLY = (1<<9); /* a{n}?=(?:a{n});? */ + final int ALLOW_MULTIPLEX_DEFINITION_NAME_CALL = (1<<10); /* (?<x>)(?<x>)(?&x) */ /* syntax (behavior); in char class [...] */ final int NOT_NEWLINE_IN_NEGATIVE_CC = (1<<20); /* [^...] */ diff --git a/src/org/joni/constants/TokenType.java b/src/org/joni/constants/TokenType.java index 59aa094..80f3079 100644 --- a/src/org/joni/constants/TokenType.java +++ b/src/org/joni/constants/TokenType.java @@ -39,6 +39,9 @@ public enum TokenType { CC_OPEN, QUOTE_OPEN, CHAR_PROPERTY, /* \p{...}, \P{...} */ + LINEBREAK, + EXTENDED_GRAPHEME_CLUSTER, + KEEP, /* in cc */ CC_CLOSE, CC_RANGE, diff --git a/src/org/joni/exception/ErrorMessages.java b/src/org/joni/exception/ErrorMessages.java index 683ff62..008ea0b 100644 --- a/src/org/joni/exception/ErrorMessages.java +++ b/src/org/joni/exception/ErrorMessages.java @@ -63,6 +63,7 @@ public interface ErrorMessages extends org.jcodings.exception.ErrorMessages { final String ERR_INVALID_POSIX_BRACKET_TYPE = "invalid POSIX bracket type"; final String ERR_INVALID_LOOK_BEHIND_PATTERN = "invalid pattern in look-behind"; final String ERR_INVALID_REPEAT_RANGE_PATTERN = "invalid repeat range {lower,upper}"; + final String ERR_INVALID_CONDITION_PATTERN = "invalid conditional pattern"; /* values error (syntax error) */ final String ERR_TOO_BIG_NUMBER = "too big number"; diff --git a/test/org/joni/test/TestA.java b/test/org/joni/test/TestA.java index d9403da..de5e3b5 100644 --- a/test/org/joni/test/TestA.java +++ b/test/org/joni/test/TestA.java @@ -500,6 +500,34 @@ public class TestA extends Test { x2s("(?<=\\babc)d", " abcd", 4, 5); x2s("(?<=\\Babc)d", "aabcd", 4, 5); + x2s("([^\\s]+),(.*)+", " xxxx, xxx xxxxxx xxxxxxxxx xxxxxxx, xxxx xxxxx xxxxx ", 1, 62); + x2s(",(.*)+", " xxxx, xxx xxxxxx xxxxxxxxx xxxxxxx, xxxx xxxxx xxxxx ", 5, 62); + + x2s("(?:(a)|(b))(?(1)cd)e", "acde", 0, 4); + ns("(?:(a)|(b))(?(1)cd)e", "ae"); + x2s("(?:(a)|(b))(?(2)cd)e", "ae", 0, 2); + ns("(?:(a)|(b))(?(2)cd)e", "acde"); + x2s("(?:(a)|(b))(?(1)c|d)", "ac", 0, 2); + x2s("(?:(a)|(b))(?(1)c|d)", "bd", 0, 2); + ns("(?:(a)|(b))(?(1)c|d)", "ad"); + ns("(?:(a)|(b))(?(1)c|d)", "bc"); + x2s("(?:(a)|(b))(?:(?(1)cd)e|fg)", "acde", 0, 4); + x2s("(?:(a)|(b))(?:(?(1)cd|x)e|fg)", "bxe", 0, 3); + ns("(?:(a)|(b))(?:(?(2)cd|x)e|fg)", "bxe"); + x2s("(?:(?<x>a)|(?<y>b))(?:(?(<x>)cd|x)e|fg)", "bxe", 0, 3); + ns("(?:(?<x>a)|(?<y>b))(?:(?(<y>)cd|x)e|fg)", "bxe"); + x2s("((?<=a))?(?(1)b|c)", "abc", 1, 2); + x2s("((?<=a))?(?(1)b|c)", "bc", 1, 2); + x2s("((?<x>x)|(?<y>y))(?(<x>)y|x)", "xy", 0, 2); + x2s("((?<x>x)|(?<y>y))(?(<x>)y|x)", "yx", 0, 2); + ns("((?<x>x)|(?<y>y))(?(<x>)y|x)", "xx"); + ns("((?<x>x)|(?<y>y))(?(<x>)y|x)", "yy"); + + x2s("\\R", "\n", 0, 1); + x2s("\\R", "\r", 0, 1); + x2s("\\R{3}", "\r\r\n\n", 0, 4); + + x2s("\\X{5}", "ăăab\n", 0, 5); } public static void main(String[] args) throws Throwable{ diff --git a/src/org/joni/constants/CCVALTYPE.java b/test/org/joni/test/TestAU.java similarity index 62% rename from src/org/joni/constants/CCVALTYPE.java rename to test/org/joni/test/TestAU.java index b2bcb30..0d37a88 100644 --- a/src/org/joni/constants/CCVALTYPE.java +++ b/test/org/joni/test/TestAU.java @@ -17,10 +17,36 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -package org.joni.constants; +package org.joni.test; -public enum CCVALTYPE { - SB, - CODE_POINT, - CLASS +import org.jcodings.Encoding; +import org.jcodings.specific.ASCIIEncoding; +import org.joni.Option; +import org.joni.Syntax; + +public class TestAU extends Test { + + public int option() { + return Option.DEFAULT; + } + + public Encoding encoding() { + return ASCIIEncoding.INSTANCE; + } + + public String testEncoding() { + return "utf-8"; + } + + public Syntax syntax() { + return Syntax.DEFAULT; + } + + public void test() throws InterruptedException { + x2s("\\X{5}", "あいab\n", 0, 5); + } + + public static void main(String[] args) throws Throwable { + new TestU8().run(); + } } diff --git a/test/org/joni/test/TestJoni.java b/test/org/joni/test/TestJoni.java index b7d5744..d7f4fe6 100644 --- a/test/org/joni/test/TestJoni.java +++ b/test/org/joni/test/TestJoni.java @@ -30,6 +30,8 @@ public class TestJoni extends TestCase { private Test testLookBehind; private Test testu8; private Test testInterrupt; + private Test testPerl; + private Test testAsciiViaUtf; protected void setUp() { testa = new TestA(); @@ -39,6 +41,8 @@ public class TestJoni extends TestCase { testu8 = new TestU8(); testLookBehind = new TestLookBehind(); testInterrupt = new TestInterrupt(); + testPerl = new TestPerl(); + testAsciiViaUtf = new TestAU(); } protected void tearDown() { @@ -52,6 +56,7 @@ public class TestJoni extends TestCase { public void testAscii() { testJoniTest(testa); + testJoniTest(testAsciiViaUtf); } public void testEUCJP() { @@ -71,4 +76,9 @@ public class TestJoni extends TestCase { public void testInterrupt() { testJoniTest(testInterrupt); } + + public void testPerl() { + testJoniTest(testPerl); + } + } diff --git a/src/org/joni/constants/CCSTATE.java b/test/org/joni/test/TestPerl.java similarity index 63% rename from src/org/joni/constants/CCSTATE.java rename to test/org/joni/test/TestPerl.java index 669b821..334f334 100644 --- a/src/org/joni/constants/CCSTATE.java +++ b/test/org/joni/test/TestPerl.java @@ -17,11 +17,36 @@ * OUT OF OR IN CONNECTION WITH THE SOFTWARE OR THE USE OR OTHER DEALINGS IN THE * SOFTWARE. */ -package org.joni.constants; +package org.joni.test; -public enum CCSTATE { - VALUE, - RANGE, - COMPLETE, - START +import org.joni.Option; +import org.joni.Syntax; +import org.jcodings.Encoding; +import org.jcodings.specific.ASCIIEncoding; + +public class TestPerl extends Test { + + public int option() { + return Option.DEFAULT; + } + + public Encoding encoding() { + return ASCIIEncoding.INSTANCE; + } + + public String testEncoding() { + return "iso-8859-2"; + } + + public Syntax syntax() { + return Syntax.PerlNG; + } + + public void test() throws InterruptedException { + + } + + public static void main(String[] args) throws Throwable{ + new TestPerl().run(); + } } diff --git a/test/org/joni/test/TestU8.java b/test/org/joni/test/TestU8.java index 2479e74..9723100 100644 --- a/test/org/joni/test/TestU8.java +++ b/test/org/joni/test/TestU8.java @@ -35,7 +35,7 @@ public class TestU8 extends Test { } public String testEncoding() { - return "iso-8859-1"; + return "utf-8"; } public Syntax syntax() { @@ -80,6 +80,12 @@ public class TestU8 extends Test { x2s("(?i:!\\[CDAT)", "![CDAT", 0, 6); x2s("(?i:\\!\\[CDAa)", "\\![CDAa", 1, 7); x2s("(?i:\\!\\[CDAb)", "\\![CDAb", 1, 7); + + x2s("\\R", "\u0085", 0, 2); + x2s("\\R", "\u2028", 0, 3); + x2s("\\R", "\u2029", 0, 3); + + x2s("\\X", "\u306F\u309A\n", 0, 3); } public static void main(String[] args) throws Throwable { -- Alioth's /usr/local/bin/git-commit-notice on /srv/git.debian.org/git/pkg-java/jruby-joni.git _______________________________________________ pkg-java-commits mailing list [email protected] http://lists.alioth.debian.org/cgi-bin/mailman/listinfo/pkg-java-commits

