Author: [EMAIL PROTECTED]
Date: Fri Oct 31 06:44:56 2008
New Revision: 674
Modified:
branches/experimental/regexp2000/src/ast.cc
branches/experimental/regexp2000/src/ast.h
branches/experimental/regexp2000/src/jsregexp.cc
branches/experimental/regexp2000/src/jsregexp.h
branches/experimental/regexp2000/src/objects-debug.cc
branches/experimental/regexp2000/src/objects-inl.h
branches/experimental/regexp2000/src/objects.h
branches/experimental/regexp2000/src/parser.cc
Log:
Use new RegExp parser for syntax checking patterns and selecting atom
matching.
Delay compilation of JSCRE regexps until their first use.
Includes previous change-list, which got committed to the wrong branch.
Modified: branches/experimental/regexp2000/src/ast.cc
==============================================================================
--- branches/experimental/regexp2000/src/ast.cc (original)
+++ branches/experimental/regexp2000/src/ast.cc Fri Oct 31 06:44:56 2008
@@ -190,6 +190,19 @@
FOR_EACH_REG_EXP_NODE_TYPE(MAKE_ACCEPT)
#undef MAKE_ACCEPT
+#define MAKE_CONVERSION(Name) \
+ RegExp##Name* RegExpTree::As##Name() { \
+ return NULL; \
+ }
+ FOR_EACH_REG_EXP_NODE_TYPE(MAKE_CONVERSION)
+#undef MAKE_CONVERSION
+
+#define MAKE_CONVERSION(Name) \
+ RegExp##Name* RegExp##Name::As##Name() { \
+ return this; \
+ }
+FOR_EACH_REG_EXP_NODE_TYPE(MAKE_CONVERSION)
+#undef MAKE_CONVERSION
RegExpEmpty RegExpEmpty::kInstance;
Modified: branches/experimental/regexp2000/src/ast.h
==============================================================================
--- branches/experimental/regexp2000/src/ast.h (original)
+++ branches/experimental/regexp2000/src/ast.h Fri Oct 31 06:44:56 2008
@@ -1215,6 +1215,9 @@
virtual ~RegExpTree() { }
virtual void* Accept(RegExpVisitor* visitor, void* data) = 0;
SmartPointer<char> ToString();
+#define MAKE_ASTYPE(Name) virtual RegExp##Name* As##Name();
+ FOR_EACH_REG_EXP_NODE_TYPE(MAKE_ASTYPE)
+#undef MAKE_ASTYPE
};
@@ -1222,6 +1225,7 @@
public:
explicit RegExpDisjunction(ZoneList<RegExpTree*>* nodes) : nodes_(nodes)
{ }
virtual void* Accept(RegExpVisitor* visitor, void* data);
+ virtual RegExpDisjunction* AsDisjunction();
ZoneList<RegExpTree*>* nodes() { return nodes_; }
private:
ZoneList<RegExpTree*>* nodes_;
@@ -1232,6 +1236,7 @@
public:
explicit RegExpAlternative(ZoneList<RegExpTree*>* nodes) : nodes_(nodes)
{ }
virtual void* Accept(RegExpVisitor* visitor, void* data);
+ virtual RegExpAlternative* AsAlternative();
ZoneList<RegExpTree*>* nodes() { return nodes_; }
private:
ZoneList<RegExpTree*>* nodes_;
@@ -1246,6 +1251,7 @@
};
explicit RegExpAssertion(Type type) : type_(type) { }
virtual void* Accept(RegExpVisitor* visitor, void* data);
+ virtual RegExpAssertion* AsAssertion();
Type type() { return type_; }
private:
Type type_;
@@ -1298,6 +1304,7 @@
: ranges_(ranges),
is_negated_(is_negated) { }
virtual void* Accept(RegExpVisitor* visitor, void* data);
+ virtual RegExpCharacterClass* AsCharacterClass();
ZoneList<CharacterRange>* ranges() { return ranges_; }
bool is_negated() { return is_negated_; }
private:
@@ -1310,6 +1317,7 @@
public:
explicit RegExpAtom(Vector<const uc16> data) : data_(data) { }
virtual void* Accept(RegExpVisitor* visitor, void* data);
+ virtual RegExpAtom* AsAtom();
Vector<const uc16> data() { return data_; }
private:
Vector<const uc16> data_;
@@ -1324,6 +1332,7 @@
is_greedy_(is_greedy),
body_(body) { }
virtual void* Accept(RegExpVisitor* visitor, void* data);
+ virtual RegExpQuantifier* AsQuantifier();
int min() { return min_; }
int max() { return max_; }
bool is_greedy() { return is_greedy_; }
@@ -1344,6 +1353,7 @@
explicit RegExpCapture(RegExpTree* body)
: body_(body) { }
virtual void* Accept(RegExpVisitor* visitor, void* data);
+ virtual RegExpCapture* AsCapture();
RegExpTree* body() { return body_; }
private:
RegExpTree* body_;
@@ -1356,6 +1366,7 @@
: body_(body),
is_positive_(is_positive) { }
virtual void* Accept(RegExpVisitor* visitor, void* data);
+ virtual RegExpLookahead* AsLookahead();
RegExpTree* body() { return body_; }
bool is_positive() { return is_positive_; }
private:
@@ -1368,6 +1379,7 @@
public:
explicit RegExpBackreference(int index) : index_(index) { }
virtual void* Accept(RegExpVisitor* visitor, void* data);
+ virtual RegExpBackreference* AsBackreference();
int index() { return index_; }
private:
int index_;
@@ -1378,6 +1390,7 @@
public:
RegExpEmpty() { }
virtual void* Accept(RegExpVisitor* visitor, void* data);
+ virtual RegExpEmpty* AsEmpty();
static RegExpEmpty* GetInstance() { return &kInstance; }
private:
static RegExpEmpty kInstance;
Modified: branches/experimental/regexp2000/src/jsregexp.cc
==============================================================================
--- branches/experimental/regexp2000/src/jsregexp.cc (original)
+++ branches/experimental/regexp2000/src/jsregexp.cc Fri Oct 31 06:44:56
2008
@@ -38,6 +38,7 @@
#include "top.h"
#include "compilation-cache.h"
#include "string-stream.h"
+#include "parser.h"
// Including pcre.h undefines DEBUG to avoid getting debug output from
// the JSCRE implementation. Make sure to redefine it in debug mode
@@ -176,7 +177,16 @@
}
-unibrow::Predicate<unibrow::RegExpSpecialChar, 128>
is_reg_exp_special_char;
+static inline Handle<Object> CreateRegExpException(Handle<JSRegExp> re,
+ Handle<String> pattern,
+ Handle<String>
error_text,
+ const char* message) {
+ Handle<JSArray> array = Factory::NewJSArray(2);
+ SetElement(array, 0, pattern);
+ SetElement(array, 1, error_text);
+ Handle<Object> regexp_err = Factory::NewSyntaxError(message, array);
+ return Handle<Object>(Top::Throw(*regexp_err));
+}
Handle<Object> RegExpImpl::Compile(Handle<JSRegExp> re,
@@ -190,15 +200,21 @@
re->set_data(*cached);
result = re;
} else {
- bool is_atom = !flags.is_ignore_case();
- for (int i = 0; is_atom && i < pattern->length(); i++) {
- if (is_reg_exp_special_char.get(pattern->Get(i)))
- is_atom = false;
- }
- if (is_atom) {
- result = AtomCompile(re, pattern, flags);
+ SafeStringInputBuffer buffer(pattern.location());
+ Handle<String> error_text;
+ RegExpTree* ast = ParseRegExp(&buffer, &error_text);
+ if (!error_text.is_null()) {
+ // Throw an exception if we fail to parse the pattern.
+ return CreateRegExpException(re, pattern,
error_text, "malformed_regexp");
+ }
+ RegExpAtom* atom = ast->AsAtom();
+ if (atom != NULL && !flags.is_ignore_case()) {
+ Vector<const uc16> atom_pattern = atom->data();
+ // Test if pattern equals atom_pattern and reuse pattern if it does.
+ Handle<String> atom_string =
Factory::NewStringFromTwoByte(atom_pattern);
+ result = AtomCompile(re, atom_string, flags);
} else {
- result = JsreCompile(re, pattern, flags);
+ result = JsrePrepare(re, pattern, flags);
}
Object* data = re->data();
if (data->IsFixedArray()) {
@@ -311,9 +327,22 @@
}
-Handle<Object> RegExpImpl::JsreCompile(Handle<JSRegExp> re,
- Handle<String> pattern,
- JSRegExp::Flags flags) {
+Handle<Object>RegExpImpl::JsrePrepare(Handle<JSRegExp> re,
+ Handle<String> pattern,
+ JSRegExp::Flags flags) {
+ Handle<Object> value(Heap::undefined_value());
+ Factory::SetRegExpData(re, JSRegExp::JSCRE, pattern, flags, value);
+ return re;
+}
+
+
+Handle<Object> RegExpImpl::JsreCompile(Handle<JSRegExp> re) {
+ ASSERT_EQ(re->TypeTag(), JSRegExp::JSCRE);
+ ASSERT(re->DataAt(JSRegExp::kJscreDataIndex)->IsUndefined());
+
+ Handle<String> pattern(re->Pattern());
+ JSRegExp::Flags flags = re->GetFlags();
+
JSRegExpIgnoreCaseOption case_option = flags.is_ignore_case()
? JSRegExpIgnoreCase
: JSRegExpDoNotIgnoreCase;
@@ -477,6 +506,13 @@
Handle<Object> RegExpImpl::JsreExec(Handle<JSRegExp> regexp,
Handle<String> subject,
Handle<Object> index) {
+ ASSERT_EQ(regexp->TypeTag(), JSRegExp::JSCRE);
+ if (regexp->DataAt(JSRegExp::kJscreDataIndex)->IsUndefined()) {
+ Handle<Object> compile_result = JsreCompile(regexp);
+ if (compile_result->IsException()) return compile_result;
+ }
+ ASSERT(regexp->DataAt(JSRegExp::kJscreDataIndex)->IsFixedArray());
+
// Prepare space for the return values.
int num_captures = JsreCapture(regexp);
@@ -497,6 +533,13 @@
Handle<Object> RegExpImpl::JsreExecGlobal(Handle<JSRegExp> regexp,
Handle<String> subject) {
+ ASSERT_EQ(regexp->TypeTag(), JSRegExp::JSCRE);
+ if (regexp->DataAt(JSRegExp::kJscreDataIndex)->IsUndefined()) {
+ Handle<Object> compile_result = JsreCompile(regexp);
+ if (compile_result->IsException()) return compile_result;
+ }
+ ASSERT(regexp->DataAt(JSRegExp::kJscreDataIndex)->IsFixedArray());
+
// Prepare space for the return values.
int num_captures = JsreCapture(regexp);
@@ -898,7 +941,7 @@
StaticCharacterClasses::StaticCharacterClasses() {
#define MAKE_CLASS(Name)\
- CharacterClass::Ranges(Vector<CharacterClass::Range>(k##Name##Ranges,\
+ CharacterClass::Ranges(Vector<CharacterClass::Range>(k##Name##Ranges, \
k##Name##RangeCount), \
&static_allocator_)
Modified: branches/experimental/regexp2000/src/jsregexp.h
==============================================================================
--- branches/experimental/regexp2000/src/jsregexp.h (original)
+++ branches/experimental/regexp2000/src/jsregexp.h Fri Oct 31 06:44:56 2008
@@ -61,10 +61,19 @@
static Handle<Object> ExecGlobal(Handle<JSRegExp> regexp,
Handle<String> subject);
- static Handle<Object> AtomCompile(Handle<JSRegExp> re,
+ // Stores an uncompiled RegExp pattern in the JSRegExp object.
+ // It will be compiled by JSCRE when first executed.
+ static Handle<Object> JsrePrepare(Handle<JSRegExp> re,
Handle<String> pattern,
JSRegExp::Flags flags);
+ // Compile the pattern using JSCRE and store the result in the
+ // JSRegExp object.
+ static Handle<Object> JsreCompile(Handle<JSRegExp> re);
+
+ static Handle<Object> AtomCompile(Handle<JSRegExp> re,
+ Handle<String> pattern,
+ JSRegExp::Flags flags);
static Handle<Object> AtomExec(Handle<JSRegExp> regexp,
Handle<String> subject,
Handle<Object> index);
@@ -76,6 +85,7 @@
Handle<String> pattern,
JSRegExp::Flags flags);
+ // Execute a compiled JSCRE pattern.
static Handle<Object> JsreExec(Handle<JSRegExp> regexp,
Handle<String> subject,
Handle<Object> index);
@@ -194,7 +204,7 @@
static const int kFieldMax = (1 << kFieldWidth);
static const int kSegmentMask = (1 << kFieldWidth) - 1;
static const int kNibbleCount = kFieldMax / 4;
- STATIC_ASSERT(kFieldMax == 8 * sizeof(uint64_t));
+ STATIC_CHECK(kFieldMax == 8 * sizeof(uint64_t));
Type type() { return type_; }
@@ -218,7 +228,7 @@
};
-STATIC_ASSERT(sizeof(CharacterClass) == 3 * kIntSize);
+STATIC_CHECK(sizeof(CharacterClass) == 3 * kIntSize);
class CharacterClassAllocator {
Modified: branches/experimental/regexp2000/src/objects-debug.cc
==============================================================================
--- branches/experimental/regexp2000/src/objects-debug.cc (original)
+++ branches/experimental/regexp2000/src/objects-debug.cc Fri Oct 31
06:44:56 2008
@@ -669,7 +669,8 @@
}
case JSRegExp::JSCRE: {
FixedArray* arr = FixedArray::cast(data());
- ASSERT(arr->get(JSRegExp::kJscreDataIndex)->IsFixedArray());
+ Object* jscre_data = arr->get(JSRegExp::kJscreDataIndex);
+ ASSERT(jscre_data->IsFixedArray() || jscre_data->IsUndefined());
break;
}
default:
Modified: branches/experimental/regexp2000/src/objects-inl.h
==============================================================================
--- branches/experimental/regexp2000/src/objects-inl.h (original)
+++ branches/experimental/regexp2000/src/objects-inl.h Fri Oct 31 06:44:56
2008
@@ -2149,6 +2149,22 @@
}
+JSRegExp::Flags JSRegExp::GetFlags() {
+ ASSERT(this->data()->IsFixedArray());
+ Object* data = this->data();
+ Smi* smi = Smi::cast(FixedArray::cast(data)->get(kFlagsIndex));
+ return Flags(smi->value());
+}
+
+
+String* JSRegExp::Pattern() {
+ ASSERT(this->data()->IsFixedArray());
+ Object* data = this->data();
+ String* pattern= String::cast(FixedArray::cast(data)->get(kSourceIndex));
+ return pattern;
+}
+
+
Object* JSRegExp::DataAt(int index) {
ASSERT(TypeTag() != NOT_COMPILED);
return FixedArray::cast(data())->get(index);
Modified: branches/experimental/regexp2000/src/objects.h
==============================================================================
--- branches/experimental/regexp2000/src/objects.h (original)
+++ branches/experimental/regexp2000/src/objects.h Fri Oct 31 06:44:56 2008
@@ -2915,6 +2915,10 @@
// Regular expressions
class JSRegExp: public JSObject {
public:
+ // Meaning of Type:
+ // NOT_COMPILED: Initial value. No data has been stored in the JSRegExp
yet.
+ // JSCRE: A complex RegExp for JSCRE
+ // ATOM: A simple string to match against using an indexOf operation.
enum Type { NOT_COMPILED, JSCRE, ATOM };
enum Flag { NONE = 0, GLOBAL = 1, IGNORE_CASE = 2, MULTILINE = 4 };
@@ -2932,6 +2936,8 @@
DECL_ACCESSORS(data, Object)
inline Type TypeTag();
+ inline Flags GetFlags();
+ inline String* Pattern();
inline Object* DataAt(int index);
static inline JSRegExp* cast(Object* obj);
Modified: branches/experimental/regexp2000/src/parser.cc
==============================================================================
--- branches/experimental/regexp2000/src/parser.cc (original)
+++ branches/experimental/regexp2000/src/parser.cc Fri Oct 31 06:44:56 2008
@@ -3820,6 +3820,7 @@
RegExpTree* RegExpParser::ParseCharacterClass(bool* ok) {
static const char* kUnterminated = "Unterminated character class";
static const char* kIllegal = "Illegal character class";
+ static const char* kRangeOutOfOrder = "Range out of order in character
class";
ASSERT_EQ(current(), '[');
Advance();
@@ -3840,6 +3841,9 @@
CharacterRange next = ParseClassAtom(CHECK_OK);
if (next.is_character_class()) {
return ReportError(CStrVector(kIllegal), CHECK_OK);
+ }
+ if (first.from() > next.to()) {
+ return ReportError(CStrVector(kRangeOutOfOrder), CHECK_OK);
}
ranges->Add(CharacterRange::Range(first.from(), next.to()));
} else {
--~--~---------~--~----~------------~-------~--~----~
v8-dev mailing list
[email protected]
http://groups.google.com/group/v8-dev
-~----------~----~----~----~------~----~------~--~---