Author: [EMAIL PROTECTED]
Date: Mon Nov 24 05:21:48 2008
New Revision: 827
Modified:
branches/experimental/regexp2000/src/assembler-re2k.cc
branches/experimental/regexp2000/src/assembler-re2k.h
branches/experimental/regexp2000/src/bytecodes-re2k.h
branches/experimental/regexp2000/src/flag-definitions.h
branches/experimental/regexp2000/src/interpreter-re2k.cc
branches/experimental/regexp2000/src/jsregexp.cc
branches/experimental/regexp2000/src/regexp-macro-assembler-ia32.cc
branches/experimental/regexp2000/src/regexp-macro-assembler-ia32.h
branches/experimental/regexp2000/src/regexp-macro-assembler-re2k.cc
branches/experimental/regexp2000/src/regexp-macro-assembler-re2k.h
branches/experimental/regexp2000/src/regexp-macro-assembler.h
Log:
* Match literals in a case independent way.
* When matching constant length sequences, do case independent
characters first, then case dependent characters, then character
classes
Review URL: http://codereview.chromium.org/11352
Modified: branches/experimental/regexp2000/src/assembler-re2k.cc
==============================================================================
--- branches/experimental/regexp2000/src/assembler-re2k.cc (original)
+++ branches/experimental/regexp2000/src/assembler-re2k.cc Mon Nov 24
05:21:48 2008
@@ -196,6 +196,25 @@
EmitOrLink(on_mismatch);
}
+void Re2kAssembler::OrThenCheckNotCharacter(uc16 c,
+ uc16 mask,
+ Label* on_mismatch) {
+ Emit(BC_OR_CHECK_NOT_CHAR);
+ Emit16(c);
+ Emit16(mask);
+ EmitOrLink(on_mismatch);
+}
+
+
+void Re2kAssembler::MinusOrThenCheckNotCharacter(uc16 c,
+ uc16 mask,
+ Label* on_mismatch) {
+ Emit(BC_MINUS_OR_CHECK_NOT_CHAR);
+ Emit16(c);
+ Emit16(mask);
+ EmitOrLink(on_mismatch);
+}
+
void Re2kAssembler::CheckCharacterLT(uc16 limit, Label* on_less) {
Emit(BC_CHECK_LT);
Modified: branches/experimental/regexp2000/src/assembler-re2k.h
==============================================================================
--- branches/experimental/regexp2000/src/assembler-re2k.h (original)
+++ branches/experimental/regexp2000/src/assembler-re2k.h Mon Nov 24
05:21:48 2008
@@ -62,6 +62,8 @@
// Checks current char register against a singleton.
void CheckCharacter(uc16 c, Label* on_match);
void CheckNotCharacter(uc16 c, Label* on_mismatch);
+ void OrThenCheckNotCharacter(uc16 c, uc16 mask, Label* on_mismatch);
+ void MinusOrThenCheckNotCharacter(uc16 c, uc16 mask, Label* on_mismatch);
// Used to check current char register against a range.
void CheckCharacterLT(uc16 limit, Label* on_less);
Modified: branches/experimental/regexp2000/src/bytecodes-re2k.h
==============================================================================
--- branches/experimental/regexp2000/src/bytecodes-re2k.h (original)
+++ branches/experimental/regexp2000/src/bytecodes-re2k.h Mon Nov 24
05:21:48 2008
@@ -52,15 +52,17 @@
V(LOAD_CURRENT_CHAR, 17, 9) /* load offset32
addr32 */ \
V(CHECK_CHAR, 18, 7) /* check_char uc16
addr32 */ \
V(CHECK_NOT_CHAR, 19, 7) /* check_not_char uc16
addr32 */ \
-V(CHECK_LT, 20, 7) /* check_lt uc16
addr32 */ \
-V(CHECK_GT, 21, 7) /* check_gr uc16
addr32 */ \
-V(CHECK_NOT_BACK_REF, 22, 6) /* check_not_back_ref capture_idx
addr32 */ \
-V(LOOKUP_MAP1, 23, 11) /* l_map1 start16 bit_map_addr32
addr32 */ \
-V(LOOKUP_MAP2, 24, 99) /* l_map2 start16
half_nibble_map_addr32* */ \
-V(LOOKUP_MAP8, 25, 99) /* l_map8 start16 byte_map
addr32* */ \
-V(LOOKUP_HI_MAP8, 26, 99) /* l_himap8 start8 byte_map_addr32
addr32* */ \
-V(CHECK_REGISTER_LT, 27, 8) /* check_reg_lt register_index value16
addr32 */ \
-V(CHECK_REGISTER_GE, 28, 8) /* check_reg_ge register_index value16
addr32 */ \
+V(OR_CHECK_NOT_CHAR, 20, 9) /* or_check_not_char uc16 uc16
addr32 */ \
+V(MINUS_OR_CHECK_NOT_CHAR, 21, 9) /* minus_or_check_not_char uc16 uc16
ad...*/ \
+V(CHECK_LT, 22, 7) /* check_lt uc16
addr32 */ \
+V(CHECK_GT, 23, 7) /* check_gr uc16
addr32 */ \
+V(CHECK_NOT_BACK_REF, 24, 6) /* check_not_back_ref capture_idx
addr32 */ \
+V(LOOKUP_MAP1, 25, 11) /* l_map1 start16 bit_map_addr32
addr32 */ \
+V(LOOKUP_MAP2, 26, 99) /* l_map2 start16
half_nibble_map_addr32* */ \
+V(LOOKUP_MAP8, 27, 99) /* l_map8 start16 byte_map
addr32* */ \
+V(LOOKUP_HI_MAP8, 28, 99) /* l_himap8 start8 byte_map_addr32
addr32* */ \
+V(CHECK_REGISTER_LT, 29, 8) /* check_reg_lt register_index value16
addr32 */ \
+V(CHECK_REGISTER_GE, 30, 8) /* check_reg_ge register_index value16
addr32 */ \
#define DECLARE_BYTECODES(name, code, length) \
static const int BC_##name = code;
Modified: branches/experimental/regexp2000/src/flag-definitions.h
==============================================================================
--- branches/experimental/regexp2000/src/flag-definitions.h (original)
+++ branches/experimental/regexp2000/src/flag-definitions.h Mon Nov 24
05:21:48 2008
@@ -291,6 +291,7 @@
DEFINE_bool(trace_regexps, false, "trace regexp execution")
DEFINE_bool(trace_regexp_bytecodes, false, "trace regexp bytecode
executon")
+DEFINE_bool(attempt_case_independent, false, "attempt to run re2k case
independent")
DEFINE_bool(re2k_native, false, "use native code regexp implementation")
//
Modified: branches/experimental/regexp2000/src/interpreter-re2k.cc
==============================================================================
--- branches/experimental/regexp2000/src/interpreter-re2k.cc (original)
+++ branches/experimental/regexp2000/src/interpreter-re2k.cc Mon Nov 24
05:21:48 2008
@@ -195,6 +195,25 @@
}
break;
}
+ BYTECODE(OR_CHECK_NOT_CHAR) {
+ int c = Load16(pc + 1);
+ if (c != (current_char | Load16(pc + 3))) {
+ pc = code_base + Load32(pc + 5);
+ } else {
+ pc += BC_OR_CHECK_NOT_CHAR_LENGTH;
+ }
+ break;
+ }
+ BYTECODE(MINUS_OR_CHECK_NOT_CHAR) {
+ int c = Load16(pc + 1);
+ int m = Load16(pc + 3);
+ if (c != ((current_char - m) | m)) {
+ pc = code_base + Load32(pc + 5);
+ } else {
+ pc += BC_MINUS_OR_CHECK_NOT_CHAR_LENGTH;
+ }
+ break;
+ }
BYTECODE(CHECK_LT) {
int limit = Load16(pc + 1);
if (current_char < limit) {
Modified: branches/experimental/regexp2000/src/jsregexp.cc
==============================================================================
--- branches/experimental/regexp2000/src/jsregexp.cc (original)
+++ branches/experimental/regexp2000/src/jsregexp.cc Mon Nov 24 05:21:48
2008
@@ -875,14 +875,13 @@
class RegExpCompiler {
public:
- explicit RegExpCompiler(int capture_count);
+ RegExpCompiler(int capture_count, bool ignore_case);
int AllocateRegister() { return next_register_++; }
Handle<FixedArray> Assemble(RegExpMacroAssembler* assembler,
RegExpNode* start,
- int capture_count,
- bool case_independent);
+ int capture_count);
inline void AddWork(RegExpNode* node) { work_list_->Add(node); }
@@ -899,6 +898,8 @@
inline void IncrementRecursionDepth() { recursion_depth_++; }
inline void DecrementRecursionDepth() { recursion_depth_--; }
+ inline bool is_case_independent() { return is_case_independent_; }
+
private:
EndNode* accept_;
EndNode* backtrack_;
@@ -906,15 +907,17 @@
List<RegExpNode*>* work_list_;
int recursion_depth_;
RegExpMacroAssembler* macro_assembler_;
+ bool is_case_independent_;
};
// Attempts to compile the regexp using a Regexp2000 code generator.
Returns
// a fixed array or a null handle depending on whether it succeeded.
-RegExpCompiler::RegExpCompiler(int capture_count)
+RegExpCompiler::RegExpCompiler(int capture_count, bool ignore_case)
: next_register_(2 * (capture_count + 1)),
work_list_(NULL),
- recursion_depth_(0) {
+ recursion_depth_(0),
+ is_case_independent_(ignore_case) {
accept_ = new EndNode(EndNode::ACCEPT);
backtrack_ = new EndNode(EndNode::BACKTRACK);
}
@@ -923,9 +926,10 @@
Handle<FixedArray> RegExpCompiler::Assemble(
RegExpMacroAssembler* macro_assembler,
RegExpNode* start,
- int capture_count,
- bool case_independent) {
- if (case_independent) return Handle<FixedArray>::null();
+ int capture_count) {
+ if (!FLAG_attempt_case_independent && is_case_independent_) {
+ return Handle<FixedArray>::null();
+ }
macro_assembler_ = macro_assembler;
List <RegExpNode*> work_list(0);
work_list_ = &work_list;
@@ -1110,107 +1114,231 @@
}
-bool TextNode::Emit(RegExpCompiler* compiler) {
- RegExpMacroAssembler* macro_assembler = compiler->macro_assembler();
- Bind(macro_assembler);
- int element_count = elms_->length();
- int cp_offset = 0;
- for (int i = 0; i < element_count; i++) {
- TextElement elm = (*elms_)[i];
- switch (elm.type) {
- case TextElement::ATOM: {
- Vector<const uc16> quarks = elm.data.u_atom->data();
- macro_assembler->CheckCharacters(quarks,
- cp_offset,
- on_failure_->label());
- cp_offset += quarks.length();
+static unibrow::Mapping<unibrow::Ecma262UnCanonicalize> uncanonicalize;
+static unibrow::Mapping<unibrow::CanonicalizationRange> canonrange;
+
+
+static inline void EmitAtomNonLetters(
+ RegExpMacroAssembler* macro_assembler,
+ TextElement elm,
+ Vector<const uc16> quarks,
+ Label* on_failure,
+ int cp_offset) {
+ unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth];
+ for (int i = quarks.length() - 1; i >= 0; i--) {
+ uc16 c = quarks[i];
+ int length = uncanonicalize.get(c, '\0', chars);
+ if (length <= 1) {
+ macro_assembler->LoadCurrentCharacter(cp_offset + i, on_failure);
+ macro_assembler->CheckNotCharacter(c, on_failure);
+ }
+ }
+}
+
+
+static bool ShortCutEmitCharacterPair(RegExpMacroAssembler*
macro_assembler,
+ uc16 c1,
+ uc16 c2,
+ Label* on_failure) {
+ uc16 exor = c1 ^ c2;
+ // Check whether exor has only one bit set.
+ if (((exor - 1) & exor) == 0) {
+ // If c1 and c2 differ only by one bit.
+ // Ecma262UnCanonicalize always gives the highest number last.
+ ASSERT(c2 > c1);
+ macro_assembler->CheckNotCharacterAfterOr(c2, exor, on_failure);
+ return true;
+ } else {
+ ASSERT(c2 > c1);
+ uc16 diff = c2 - c1;
+ if (((diff - 1) & diff) == 0 && c1 >= diff) {
+ // If the characters differ by 2^n but don't differ by one bit then
+ // subtract the difference from the found character, then do the or
+ // trick. We avoid the theoretical case where negative numbers are
+ // involved in order to simplify code generation.
+ macro_assembler->CheckNotCharacterAfterMinusOr(c2 - diff,
+ diff,
+ on_failure);
+ return true;
+ }
+ }
+ return false;
+}
+
+
+static inline void EmitAtomLetters(
+ RegExpMacroAssembler* macro_assembler,
+ TextElement elm,
+ Vector<const uc16> quarks,
+ Label* on_failure,
+ int cp_offset) {
+ unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth];
+ for (int i = quarks.length() - 1; i >= 0; i--) {
+ uc16 c = quarks[i];
+ int length = uncanonicalize.get(c, '\0', chars);
+ if (length <= 1) continue;
+ macro_assembler->LoadCurrentCharacter(cp_offset + i, on_failure);
+ Label ok;
+ ASSERT(unibrow::Ecma262UnCanonicalize::kMaxWidth == 4);
+ switch (length) {
+ case 2: {
+ if (ShortCutEmitCharacterPair(macro_assembler,
+ chars[0],
+ chars[1],
+ on_failure)) {
+ ok.Unuse();
+ } else {
+ macro_assembler->CheckCharacter(chars[0], &ok);
+ macro_assembler->CheckNotCharacter(chars[1], on_failure);
+ macro_assembler->Bind(&ok);
+ }
break;
}
- case TextElement::CHAR_CLASS: {
- RegExpCharacterClass* cc = elm.data.u_char_class;
- macro_assembler->LoadCurrentCharacter(cp_offset,
on_failure_->label());
- cp_offset++;
+ case 4:
+ macro_assembler->CheckCharacter(chars[3], &ok);
+ // Fall through!
+ case 3:
+ macro_assembler->CheckCharacter(chars[0], &ok);
+ macro_assembler->CheckCharacter(chars[1], &ok);
+ macro_assembler->CheckNotCharacter(chars[2], on_failure);
+ macro_assembler->Bind(&ok);
+ break;
+ default:
+ UNREACHABLE();
+ break;
+ }
+ }
+}
- ZoneList<CharacterRange>* ranges = cc->ranges();
- Label success;
+static void EmitCharClass(RegExpMacroAssembler* macro_assembler,
+ RegExpCharacterClass* cc,
+ int cp_offset,
+ Label* on_failure) {
+ macro_assembler->LoadCurrentCharacter(cp_offset, on_failure);
+ cp_offset++;
- Label *char_is_in_class =
- cc->is_negated() ? on_failure_->label() : &success;
+ ZoneList<CharacterRange>* ranges = cc->ranges();
- int range_count = ranges->length();
+ Label success;
- if (range_count == 0) {
- if (!cc->is_negated()) {
- on_failure()->GoTo(compiler);
- }
- break;
- }
+ Label *char_is_in_class =
+ cc->is_negated() ? on_failure : &success;
- for (int i = 0; i < range_count - 1; i++) {
- CharacterRange& range = (*ranges)[i];
- Label next_range;
- uc16 from = range.from();
- uc16 to = range.to();
- if (to == from) {
- macro_assembler->CheckCharacter(to, char_is_in_class);
- } else {
- if (from != 0) {
- macro_assembler->CheckCharacterLT(from, &next_range);
- }
- if (to != 0xffff) {
- macro_assembler->CheckCharacterLT(to + 1, char_is_in_class);
- } else {
- macro_assembler->GoTo(char_is_in_class);
- }
- }
- macro_assembler->Bind(&next_range);
- }
+ int range_count = ranges->length();
- if (range_count != 0) {
- CharacterRange& range = (*ranges)[range_count - 1];
- uc16 from = range.from();
- uc16 to = range.to();
-
- if (to == from) {
- if (cc->is_negated()) {
- macro_assembler->CheckCharacter(to, on_failure_->label());
- } else {
- macro_assembler->CheckNotCharacter(to, on_failure_->label());
- }
- } else {
- if (from != 0) {
- if (!cc->is_negated()) {
- macro_assembler->CheckCharacterLT(from,
on_failure_->label());
- } else {
- macro_assembler->CheckCharacterLT(from, &success);
- }
- }
- if (to != 0xffff) {
- if (!cc->is_negated()) {
- macro_assembler->CheckCharacterGT(to,
on_failure_->label());
- } else {
- macro_assembler->CheckCharacterLT(to + 1,
on_failure_->label());
- }
- } else {
- if (cc->is_negated()) {
- macro_assembler->GoTo(on_failure_->label());
- }
- }
- }
- } else if (cc->is_negated()) {
- macro_assembler->GoTo(on_failure_->label());
- }
+ if (range_count == 0) {
+ if (!cc->is_negated()) {
+ macro_assembler->GoTo(on_failure);
+ }
+ return;
+ }
- macro_assembler->Bind(&success);
+ for (int i = 0; i < range_count - 1; i++) {
+ CharacterRange& range = ranges->at(i);
+ Label next_range;
+ uc16 from = range.from();
+ uc16 to = range.to();
+ if (to == from) {
+ macro_assembler->CheckCharacter(to, char_is_in_class);
+ } else {
+ if (from != 0) {
+ macro_assembler->CheckCharacterLT(from, &next_range);
+ }
+ if (to != 0xffff) {
+ macro_assembler->CheckCharacterLT(to + 1, char_is_in_class);
+ } else {
+ macro_assembler->GoTo(char_is_in_class);
+ }
+ }
+ macro_assembler->Bind(&next_range);
+ }
- break;
+ CharacterRange& range = ranges->at(range_count - 1);
+ uc16 from = range.from();
+ uc16 to = range.to();
+
+ if (to == from) {
+ if (cc->is_negated()) {
+ macro_assembler->CheckCharacter(to, on_failure);
+ } else {
+ macro_assembler->CheckNotCharacter(to, on_failure);
+ }
+ } else {
+ if (from != 0) {
+ if (!cc->is_negated()) {
+ macro_assembler->CheckCharacterLT(from, on_failure);
+ } else {
+ macro_assembler->CheckCharacterLT(from, &success);
}
- default:
- UNREACHABLE();
- return false;
+ }
+ if (to != 0xffff) {
+ if (!cc->is_negated()) {
+ macro_assembler->CheckCharacterGT(to, on_failure);
+ } else {
+ macro_assembler->CheckCharacterLT(to + 1, on_failure);
+ }
+ } else {
+ if (cc->is_negated()) {
+ macro_assembler->GoTo(on_failure);
+ }
+ }
+ }
+ macro_assembler->Bind(&success);
+}
+
+
+
+bool TextNode::Emit(RegExpCompiler* compiler) {
+ RegExpMacroAssembler* macro_assembler = compiler->macro_assembler();
+ Bind(macro_assembler);
+ int element_count = elms_->length();
+ int cp_offset = 0;
+ // First, handle straight character matches.
+ for (int i = 0; i < element_count; i++) {
+ TextElement elm = elms_->at(i);
+ if (elm.type == TextElement::ATOM) {
+ Vector<const uc16> quarks = elm.data.u_atom->data();
+ if (!compiler->is_case_independent()) {
+ macro_assembler->CheckCharacters(quarks,
+ cp_offset,
+ on_failure_->label());
+ } else {
+ EmitAtomNonLetters(macro_assembler, elm, quarks,
on_failure_->label(), cp_offset);
+ }
+ cp_offset += quarks.length();
+ } else {
+ ASSERT_EQ(elm.type, TextElement::CHAR_CLASS);
+ cp_offset++;
}
}
+ // Second, handle case independent letter matches if any.
+ if (compiler->is_case_independent()) {
+ cp_offset = 0;
+ for (int i = 0; i < element_count; i++) {
+ TextElement elm = elms_->at(i);
+ if (elm.type == TextElement::ATOM) {
+ Vector<const uc16> quarks = elm.data.u_atom->data();
+ EmitAtomLetters(macro_assembler, elm, quarks,
on_failure_->label(), cp_offset);
+ cp_offset += quarks.length();
+ } else {
+ cp_offset++;
+ }
+ }
+ }
+ // If the fast character matches passed then do the character classes.
+ cp_offset = 0;
+ for (int i = 0; i < element_count; i++) {
+ TextElement elm = elms_->at(i);
+ if (elm.type == TextElement::CHAR_CLASS) {
+ RegExpCharacterClass* cc = elm.data.u_char_class;
+ EmitCharClass(macro_assembler, cc, cp_offset, on_failure_->label());
+ cp_offset ++;
+ } else {
+ cp_offset += elm.data.u_atom->data().length();
+ }
+ }
+
compiler->AddWork(on_failure_);
macro_assembler->AdvanceCurrentPosition(cp_offset);
return on_success()->GoTo(compiler);
@@ -1225,14 +1353,14 @@
// is to use the Dispatch table to try only the relevant ones.
int i;
for (i = 0; i < choice_count - 1; i++) {
- GuardedAlternative alternative = (*alternatives_)[i];
+ GuardedAlternative alternative = alternatives_->at(i);
Label after;
Label after_no_pop_cp;
ZoneList<Guard*>* guards = alternative.guards();
if (guards != NULL) {
int guard_count = guards->length();
for (int j = 0; j < guard_count; j++) {
- GenerateGuard(macro_assembler, (*guards)[j], &after_no_pop_cp);
+ GenerateGuard(macro_assembler, guards->at(j), &after_no_pop_cp);
}
}
macro_assembler->PushCurrentPosition();
@@ -1246,12 +1374,12 @@
macro_assembler->PopCurrentPosition();
macro_assembler->Bind(&after_no_pop_cp);
}
- GuardedAlternative alternative = (*alternatives_)[i];
+ GuardedAlternative alternative = alternatives_->at(i);
ZoneList<Guard*>* guards = alternative.guards();
if (guards != NULL) {
int guard_count = guards->length();
for (int j = 0; j < guard_count; j++) {
- GenerateGuard(macro_assembler, (*guards)[j], on_failure_->label());
+ GenerateGuard(macro_assembler, guards->at(j), on_failure_->label());
}
}
if (!on_failure_->IsBacktrack()) {
@@ -1932,10 +2060,6 @@
}
-static unibrow::Mapping<unibrow::Ecma262UnCanonicalize> uncanonicalize;
-static unibrow::Mapping<unibrow::CanonicalizationRange> canonrange;
-
-
void CharacterRange::AddCaseEquivalents(ZoneList<CharacterRange>* ranges) {
unibrow::uchar chars[unibrow::Ecma262UnCanonicalize::kMaxWidth];
if (IsSingleton()) {
@@ -2412,7 +2536,7 @@
Handle<FixedArray> RegExpEngine::Compile(RegExpParseResult* input,
RegExpNode** node_return,
bool ignore_case) {
- RegExpCompiler compiler(input->capture_count);
+ RegExpCompiler compiler(input->capture_count, ignore_case);
// Wrap the body of the regexp in capture #0.
RegExpNode* captured_body = RegExpCapture::ToNode(input->tree,
0,
@@ -2445,8 +2569,7 @@
ignore_case);
return compiler.Assemble(¯o_assembler,
node,
- input->capture_count,
- ignore_case);
+ input->capture_count);
}
#endif
byte codes[1024];
@@ -2454,8 +2577,7 @@
RegExpMacroAssemblerRe2k macro_assembler(&assembler);
return compiler.Assemble(¯o_assembler,
node,
- input->capture_count,
- ignore_case);
+ input->capture_count);
}
Modified:
branches/experimental/regexp2000/src/regexp-macro-assembler-ia32.cc
==============================================================================
--- branches/experimental/regexp2000/src/regexp-macro-assembler-ia32.cc
(original)
+++ branches/experimental/regexp2000/src/regexp-macro-assembler-ia32.cc Mon
Nov 24 05:21:48 2008
@@ -245,6 +245,27 @@
}
+void RegExpMacroAssemblerIA32::CheckNotCharacterAfterOr(uc16 c,
+ uc16 mask,
+ Label*
on_not_equal) {
+ __ mov(eax, Operand(edx));
+ __ or_(eax, mask);
+ __ cmp(eax, c);
+ BranchOrBacktrack(not_equal, on_not_equal);
+}
+
+
+void RegExpMacroAssemblerIA32::CheckNotCharacterAfterMinusOr(
+ uc16 c,
+ uc16 mask,
+ Label* on_not_equal) {
+ __ lea(eax, Operand(edx, -mask));
+ __ or_(eax, mask);
+ __ cmp(eax, c);
+ BranchOrBacktrack(not_equal, on_not_equal);
+}
+
+
void RegExpMacroAssemblerIA32::DispatchHalfNibbleMap(
uc16 start,
Label* half_nibble_map,
Modified: branches/experimental/regexp2000/src/regexp-macro-assembler-ia32.h
==============================================================================
--- branches/experimental/regexp2000/src/regexp-macro-assembler-ia32.h
(original)
+++ branches/experimental/regexp2000/src/regexp-macro-assembler-ia32.h Mon
Nov 24 05:21:48 2008
@@ -54,6 +54,10 @@
virtual void CheckCurrentPosition(int register_index, Label* on_equal);
virtual void CheckNotBackReference(int start_reg, Label* on_no_match);
virtual void CheckNotCharacter(uc16 c, Label* on_not_equal);
+ virtual void CheckNotCharacterAfterOr(uc16 c, uc16 mask, Label*
on_not_equal);
+ virtual void CheckNotCharacterAfterMinusOr(uc16 c,
+ uc16 mask,
+ Label* on_not_equal);
virtual void DispatchByteMap(uc16 start,
Label* byte_map,
const Vector<Label*>& destinations);
Modified:
branches/experimental/regexp2000/src/regexp-macro-assembler-re2k.cc
==============================================================================
--- branches/experimental/regexp2000/src/regexp-macro-assembler-re2k.cc
(original)
+++ branches/experimental/regexp2000/src/regexp-macro-assembler-re2k.cc Mon
Nov 24 05:21:48 2008
@@ -174,6 +174,21 @@
}
+void RegExpMacroAssemblerRe2k::CheckNotCharacterAfterOr(uc16 c,
+ uc16 mask,
+ Label*
on_not_equal) {
+ assembler_->OrThenCheckNotCharacter(c, mask, on_not_equal);
+}
+
+
+void RegExpMacroAssemblerRe2k::CheckNotCharacterAfterMinusOr(
+ uc16 c,
+ uc16 mask,
+ Label* on_not_equal) {
+ assembler_->MinusOrThenCheckNotCharacter(c, mask, on_not_equal);
+}
+
+
void RegExpMacroAssemblerRe2k::CheckNotBackReference(int start_reg,
Label* on_not_equal) {
assembler_->CheckNotBackReference(start_reg, on_not_equal);
Modified: branches/experimental/regexp2000/src/regexp-macro-assembler-re2k.h
==============================================================================
--- branches/experimental/regexp2000/src/regexp-macro-assembler-re2k.h
(original)
+++ branches/experimental/regexp2000/src/regexp-macro-assembler-re2k.h Mon
Nov 24 05:21:48 2008
@@ -60,6 +60,10 @@
virtual void CheckCharacterGT(uc16 limit, Label* on_greater);
virtual void CheckCharacter(uc16 c, Label* on_equal);
virtual void CheckNotCharacter(uc16 c, Label* on_not_equal);
+ virtual void CheckNotCharacterAfterOr(uc16 c, uc16 mask, Label*
on_not_equal);
+ virtual void CheckNotCharacterAfterMinusOr(uc16 c,
+ uc16 mask,
+ Label* on_not_equal);
virtual void CheckNotBackReference(int start_reg, Label* on_no_match);
virtual void CheckCharacters(Vector<const uc16> str,
int cp_offset,
Modified: branches/experimental/regexp2000/src/regexp-macro-assembler.h
==============================================================================
--- branches/experimental/regexp2000/src/regexp-macro-assembler.h
(original)
+++ branches/experimental/regexp2000/src/regexp-macro-assembler.h Mon Nov
24 05:21:48 2008
@@ -61,6 +61,10 @@
virtual void CheckCharacter(uc16 c, Label* on_equal) = 0;
virtual void CheckCharacterGT(uc16 limit, Label* on_greater) = 0;
virtual void CheckCharacterLT(uc16 limit, Label* on_less) = 0;
+ // Check the current character for a match with a literal string. If we
+ // fail to match then goto the on_failure label. End of input always
+ // matches. If the label is NULL then we should pop a backtrack address
off
+ // the stack abnd go to that.
virtual void CheckCharacters(
Vector<const uc16> str,
int cp_offset,
@@ -72,11 +76,21 @@
int register_index,
Label* on_equal) = 0;
virtual void CheckNotBackReference(int start_reg, Label* on_no_match) =
0;
- // Check the current character for a match with a literal string. If we
+ // Check the current character for a match with a literal character. If
we
// fail to match then goto the on_failure label. End of input always
// matches. If the label is NULL then we should pop a backtrack address
off
// the stack and go to that.
virtual void CheckNotCharacter(uc16 c, Label* on_not_equal) = 0;
+ // Bitwise or the current character with the given constant and then
+ // check for a match with c.
+ virtual void CheckNotCharacterAfterOr(uc16 c,
+ uc16 or_with,
+ Label* on_not_equal) = 0;
+ // Subtract a constant from the current character, then or with the given
+ // constant and then check for a match with c.
+ virtual void CheckNotCharacterAfterMinusOr(uc16 c,
+ uc16 minus_then_or_with,
+ Label* on_not_equal) = 0;
// Dispatch after looking the current character up in a byte map. The
// destinations vector has up to 256 labels.
virtual void DispatchByteMap(
--~--~---------~--~----~------------~-------~--~----~
v8-dev mailing list
[email protected]
http://groups.google.com/group/v8-dev
-~----------~----~----~----~------~----~------~--~---