Author: [EMAIL PROTECTED]
Date: Wed Nov 26 23:27:08 2008
New Revision: 853
Modified:
branches/bleeding_edge/src/flag-definitions.h
branches/bleeding_edge/src/jsregexp.cc
branches/bleeding_edge/src/jsregexp.h
branches/bleeding_edge/src/regexp-macro-assembler-ia32.cc
branches/bleeding_edge/src/regexp-macro-assembler-ia32.h
branches/bleeding_edge/src/regexp-macro-assembler-irregexp.cc
branches/bleeding_edge/src/regexp-macro-assembler-irregexp.h
branches/bleeding_edge/src/regexp-macro-assembler.h
Log:
* Complete case independent support in Irregexp.
Review URL: http://codereview.chromium.org/12473
Modified: branches/bleeding_edge/src/flag-definitions.h
==============================================================================
--- branches/bleeding_edge/src/flag-definitions.h (original)
+++ branches/bleeding_edge/src/flag-definitions.h Wed Nov 26 23:27:08 2008
@@ -203,8 +203,8 @@
DEFINE_bool(irregexp, false, "new regular expression code")
DEFINE_bool(trace_regexps, false, "trace Irregexp execution")
DEFINE_bool(trace_regexp_bytecodes, false, "trace Irregexp bytecode
execution")
-DEFINE_bool(attempt_case_independent, false, "attempt to run Irregexp case
independent")
DEFINE_bool(irregexp_native, false, "use native code Irregexp
implementation (IA32 only)")
+DEFINE_bool(disable_jscre, false, "abort if JSCRE is used. Only useful
with --irregexp")
// Testing flags test/cctest/test-{flags,api,serialization}.cc
DEFINE_bool(testing_bool_flag, true, "testing_bool_flag")
Modified: branches/bleeding_edge/src/jsregexp.cc
==============================================================================
--- branches/bleeding_edge/src/jsregexp.cc (original)
+++ branches/bleeding_edge/src/jsregexp.cc Wed Nov 26 23:27:08 2008
@@ -243,6 +243,9 @@
&node,
flags.is_ignore_case());
if (irregexp_data.is_null()) {
+ if (FLAG_disable_jscre) {
+ UNIMPLEMENTED();
+ }
result = JscrePrepare(re, pattern, flags);
} else {
result = IrregexpPrepare(re, pattern, flags, irregexp_data);
@@ -267,6 +270,9 @@
Handle<Object> index) {
switch (regexp->TypeTag()) {
case JSRegExp::JSCRE:
+ if (FLAG_disable_jscre) {
+ UNIMPLEMENTED();
+ }
return JscreExec(regexp, subject, index);
case JSRegExp::ATOM:
return AtomExec(regexp, subject, index);
@@ -283,6 +289,9 @@
Handle<String> subject) {
switch (regexp->TypeTag()) {
case JSRegExp::JSCRE:
+ if (FLAG_disable_jscre) {
+ UNIMPLEMENTED();
+ }
return JscreExecGlobal(regexp, subject);
case JSRegExp::ATOM:
return AtomExecGlobal(regexp, subject);
@@ -906,7 +915,7 @@
inline void IncrementRecursionDepth() { recursion_depth_++; }
inline void DecrementRecursionDepth() { recursion_depth_--; }
- inline bool is_case_independent() { return is_case_independent_; }
+ inline bool ignore_case() { return ignore_case_; }
private:
EndNode* accept_;
@@ -915,7 +924,7 @@
List<RegExpNode*>* work_list_;
int recursion_depth_;
RegExpMacroAssembler* macro_assembler_;
- bool is_case_independent_;
+ bool ignore_case_;
};
@@ -925,7 +934,7 @@
: next_register_(2 * (capture_count + 1)),
work_list_(NULL),
recursion_depth_(0),
- is_case_independent_(ignore_case) {
+ ignore_case_(ignore_case) {
accept_ = new EndNode(EndNode::ACCEPT);
backtrack_ = new EndNode(EndNode::BACKTRACK);
}
@@ -935,9 +944,6 @@
RegExpMacroAssembler* macro_assembler,
RegExpNode* start,
int capture_count) {
- if (!FLAG_attempt_case_independent && is_case_independent_) {
- return Handle<FixedArray>::null();
- }
macro_assembler_ = macro_assembler;
List <RegExpNode*> work_list(0);
work_list_ = &work_list;
@@ -1306,7 +1312,7 @@
TextElement elm = elms_->at(i);
if (elm.type == TextElement::ATOM) {
Vector<const uc16> quarks = elm.data.u_atom->data();
- if (compiler->is_case_independent()) {
+ if (compiler->ignore_case()) {
EmitAtomNonLetters(macro_assembler,
elm,
quarks,
@@ -1324,7 +1330,7 @@
}
}
// Second, handle case independent letter matches if any.
- if (compiler->is_case_independent()) {
+ if (compiler->ignore_case()) {
cp_offset = 0;
for (int i = 0; i < element_count; i++) {
TextElement elm = elms_->at(i);
@@ -1360,6 +1366,22 @@
}
+void TextNode::MakeCaseIndependent() {
+ int element_count = elms_->length();
+ for (int i = 0; i < element_count; i++) {
+ TextElement elm = elms_->at(i);
+ if (elm.type == TextElement::CHAR_CLASS) {
+ RegExpCharacterClass* cc = elm.data.u_char_class;
+ ZoneList<CharacterRange>* ranges = cc->ranges();
+ int range_count = ranges->length();
+ for (int i = 0; i < range_count; i++) {
+ ranges->at(i).AddCaseEquivalents(ranges);
+ }
+ }
+ }
+}
+
+
bool ChoiceNode::Emit(RegExpCompiler* compiler) {
int choice_count = alternatives_->length();
RegExpMacroAssembler* macro_assembler = compiler->macro_assembler();
@@ -1477,9 +1499,8 @@
macro->IfRegisterLT(start_reg_, 0, on_success()->label());
macro->IfRegisterLT(end_reg_, 0, on_success()->label());
ASSERT_EQ(start_reg_ + 1, end_reg_);
- if (compiler->is_case_independent()) {
- macro->CheckNotBackReferenceCaseIndependent(start_reg_,
- on_failure_->label());
+ if (compiler->ignore_case()) {
+ macro->CheckNotBackReferenceIgnoreCase(start_reg_,
on_failure_->label());
} else {
macro->CheckNotBackReference(start_reg_, on_failure_->label());
}
@@ -2429,6 +2450,9 @@
void Analysis::VisitText(TextNode* that) {
+ if (ignore_case_) {
+ that->MakeCaseIndependent();
+ }
EnsureAnalyzed(that->on_success());
EnsureAnalyzed(that->on_failure());
}
@@ -2604,7 +2628,7 @@
captured_body,
compiler.backtrack());
if (node_return != NULL) *node_return = node;
- Analysis analysis;
+ Analysis analysis(ignore_case);
analysis.EnsureAnalyzed(node);
if (!FLAG_irregexp) {
Modified: branches/bleeding_edge/src/jsregexp.h
==============================================================================
--- branches/bleeding_edge/src/jsregexp.h (original)
+++ branches/bleeding_edge/src/jsregexp.h Wed Nov 26 23:27:08 2008
@@ -596,6 +596,7 @@
RegExpNode* on_failure() { return on_failure_; }
virtual bool Emit(RegExpCompiler* compiler);
ZoneList<TextElement>* elements() { return elms_; }
+ void MakeCaseIndependent();
private:
RegExpNode* on_failure_;
ZoneList<TextElement>* elms_;
@@ -741,12 +742,19 @@
class Analysis: public NodeVisitor {
public:
+ explicit Analysis(bool ignore_case)
+ : ignore_case_(ignore_case) { }
void EnsureAnalyzed(RegExpNode* node);
#define DECLARE_VISIT(Type) \
virtual void Visit##Type(Type##Node* that);
FOR_EACH_NODE_TYPE(DECLARE_VISIT)
#undef DECLARE_VISIT
+
+ private:
+ bool ignore_case_;
+
+ DISALLOW_IMPLICIT_CONSTRUCTORS(Analysis);
};
Modified: branches/bleeding_edge/src/regexp-macro-assembler-ia32.cc
==============================================================================
--- branches/bleeding_edge/src/regexp-macro-assembler-ia32.cc (original)
+++ branches/bleeding_edge/src/regexp-macro-assembler-ia32.cc Wed Nov 26
23:27:08 2008
@@ -212,7 +212,7 @@
}
-void RegExpMacroAssemblerIA32::CheckNotBackReferenceCaseIndependent(
+void RegExpMacroAssemblerIA32::CheckNotBackReferenceIgnoreCase(
int start_reg, Label* on_no_match) {
UNIMPLEMENTED();
}
Modified: branches/bleeding_edge/src/regexp-macro-assembler-ia32.h
==============================================================================
--- branches/bleeding_edge/src/regexp-macro-assembler-ia32.h (original)
+++ branches/bleeding_edge/src/regexp-macro-assembler-ia32.h Wed Nov 26
23:27:08 2008
@@ -50,8 +50,8 @@
Label* on_failure);
virtual void CheckCurrentPosition(int register_index, Label* on_equal);
virtual void CheckNotBackReference(int start_reg, Label* on_no_match);
- virtual void CheckNotBackReferenceCaseIndependent(int start_reg,
- Label* on_no_match);
+ virtual void CheckNotBackReferenceIgnoreCase(int start_reg,
+ Label* on_no_match);
virtual void CheckNotCharacter(uc16 c, Label* on_not_equal);
virtual void CheckNotCharacterAfterOr(uc16 c, uc16 mask, Label*
on_not_equal);
virtual void CheckNotCharacterAfterMinusOr(uc16 c,
Modified: branches/bleeding_edge/src/regexp-macro-assembler-irregexp.cc
==============================================================================
--- branches/bleeding_edge/src/regexp-macro-assembler-irregexp.cc
(original)
+++ branches/bleeding_edge/src/regexp-macro-assembler-irregexp.cc Wed Nov
26 23:27:08 2008
@@ -199,7 +199,7 @@
}
-void RegExpMacroAssemblerIrregexp::CheckNotBackReferenceCaseIndependent(
+void RegExpMacroAssemblerIrregexp::CheckNotBackReferenceIgnoreCase(
int start_reg,
Label* on_not_equal) {
assembler_->CheckNotBackReferenceNoCase(start_reg, on_not_equal);
Modified: branches/bleeding_edge/src/regexp-macro-assembler-irregexp.h
==============================================================================
--- branches/bleeding_edge/src/regexp-macro-assembler-irregexp.h
(original)
+++ branches/bleeding_edge/src/regexp-macro-assembler-irregexp.h Wed Nov
26
23:27:08 2008
@@ -65,8 +65,8 @@
uc16 mask,
Label* on_not_equal);
virtual void CheckNotBackReference(int start_reg, Label* on_no_match);
- virtual void CheckNotBackReferenceCaseIndependent(int start_reg,
- Label* on_no_match);
+ virtual void CheckNotBackReferenceIgnoreCase(int start_reg,
+ Label* on_no_match);
virtual void CheckCharacters(Vector<const uc16> str,
int cp_offset,
Label* on_failure);
Modified: branches/bleeding_edge/src/regexp-macro-assembler.h
==============================================================================
--- branches/bleeding_edge/src/regexp-macro-assembler.h (original)
+++ branches/bleeding_edge/src/regexp-macro-assembler.h Wed Nov 26 23:27:08
2008
@@ -76,8 +76,8 @@
int register_index,
Label* on_equal) = 0;
virtual void CheckNotBackReference(int start_reg, Label* on_no_match) =
0;
- virtual void CheckNotBackReferenceCaseIndependent(int start_reg,
- Label* on_no_match) =
0;
+ virtual void CheckNotBackReferenceIgnoreCase(int start_reg,
+ Label* on_no_match) = 0;
// Check the current character for a match with a literal character. If
we
// fail to match then goto the on_failure label. End of input always
// matches. If the label is NULL then we should pop a backtrack address
off
--~--~---------~--~----~------------~-------~--~----~
v8-dev mailing list
[email protected]
http://groups.google.com/group/v8-dev
-~----------~----~----~----~------~----~------~--~---