Revision: 5719
Author: [email protected]
Date: Wed Oct 27 05:33:48 2010
Log: Make Parser class have no friends and fewer things to depend on it.
Review URL: http://codereview.chromium.org/4146006
http://code.google.com/p/v8/source/detail?r=5719
Modified:
/branches/bleeding_edge/src/api.cc
/branches/bleeding_edge/src/compiler.cc
/branches/bleeding_edge/src/jsregexp.cc
/branches/bleeding_edge/src/liveedit.cc
/branches/bleeding_edge/src/parser.cc
/branches/bleeding_edge/src/parser.h
/branches/bleeding_edge/test/cctest/test-regexp.cc
=======================================
--- /branches/bleeding_edge/src/api.cc Mon Oct 25 08:22:03 2010
+++ /branches/bleeding_edge/src/api.cc Wed Oct 27 05:33:48 2010
@@ -1155,13 +1155,13 @@
ScriptData* ScriptData::PreCompile(const char* input, int length) {
unibrow::Utf8InputBuffer<> buf(input, length);
- return i::Parser::PreParse(i::Handle<i::String>(), &buf, NULL);
+ return i::ParserApi::PreParse(i::Handle<i::String>(), &buf, NULL);
}
ScriptData* ScriptData::PreCompile(v8::Handle<String> source) {
i::Handle<i::String> str = Utils::OpenHandle(*source);
- return i::Parser::PreParse(str, NULL, NULL);
+ return i::ParserApi::PreParse(str, NULL, NULL);
}
=======================================
--- /branches/bleeding_edge/src/compiler.cc Wed Oct 27 02:19:43 2010
+++ /branches/bleeding_edge/src/compiler.cc Wed Oct 27 05:33:48 2010
@@ -176,7 +176,7 @@
// Only allow non-global compiles for eval.
ASSERT(info->is_eval() || info->is_global());
- if (!Parser::Parse(info)) return Handle<SharedFunctionInfo>::null();
+ if (!ParserApi::Parse(info)) return Handle<SharedFunctionInfo>::null();
// Measure how long it takes to do the compilation; only take the
// rest of the function into account to avoid overlap with the
@@ -281,7 +281,7 @@
if (pre_data == NULL
&& FLAG_lazy
&& source_length >= FLAG_min_preparse_length) {
- pre_data = Parser::PartialPreParse(source, NULL, extension);
+ pre_data = ParserApi::PartialPreParse(source, NULL, extension);
}
// Create a script object describing the script to be compiled.
@@ -364,7 +364,7 @@
Counters::total_compile_size.Increment(compiled_size);
// Generate the AST for the lazily compiled function.
- if (Parser::Parse(info)) {
+ if (ParserApi::Parse(info)) {
// Measure how long it takes to do the lazy compilation; only take the
// rest of the function into account to avoid overlap with the lazy
// parsing statistics.
=======================================
--- /branches/bleeding_edge/src/jsregexp.cc Tue Oct 19 07:00:01 2010
+++ /branches/bleeding_edge/src/jsregexp.cc Wed Oct 27 05:33:48 2010
@@ -125,7 +125,8 @@
PostponeInterruptsScope postpone;
RegExpCompileData parse_result;
FlatStringReader reader(pattern);
- if (!Parser::ParseRegExp(&reader, flags.is_multiline(), &parse_result)) {
+ if (!RegExpParser::ParseRegExp(&reader, flags.is_multiline(),
+ &parse_result)) {
// Throw an exception if we fail to parse the pattern.
ThrowRegExpException(re,
pattern,
@@ -267,7 +268,8 @@
RegExpCompileData compile_data;
FlatStringReader reader(pattern);
- if (!Parser::ParseRegExp(&reader, flags.is_multiline(), &compile_data)) {
+ if (!RegExpParser::ParseRegExp(&reader, flags.is_multiline(),
+ &compile_data)) {
// Throw an exception if we fail to parse the pattern.
// THIS SHOULD NOT HAPPEN. We already pre-parsed it successfully once.
ThrowRegExpException(re,
=======================================
--- /branches/bleeding_edge/src/liveedit.cc Mon Oct 25 08:22:03 2010
+++ /branches/bleeding_edge/src/liveedit.cc Wed Oct 27 05:33:48 2010
@@ -404,7 +404,7 @@
// Build AST.
CompilationInfo info(script);
info.MarkAsGlobal();
- if (Parser::Parse(&info)) {
+ if (ParserApi::Parse(&info)) {
// Compile the code.
LiveEditFunctionTracker tracker(info.function());
if (Compiler::MakeCodeForLiveEdit(&info)) {
=======================================
--- /branches/bleeding_edge/src/parser.cc Wed Oct 27 02:19:43 2010
+++ /branches/bleeding_edge/src/parser.cc Wed Oct 27 05:33:48 2010
@@ -87,112 +87,6 @@
};
-template <typename T, int initial_size>
-class BufferedZoneList {
- public:
- BufferedZoneList() : list_(NULL), last_(NULL) {}
-
- // Adds element at end of list. This element is buffered and can
- // be read using last() or removed using RemoveLast until a new Add or
until
- // RemoveLast or GetList has been called.
- void Add(T* value) {
- if (last_ != NULL) {
- if (list_ == NULL) {
- list_ = new ZoneList<T*>(initial_size);
- }
- list_->Add(last_);
- }
- last_ = value;
- }
-
- T* last() {
- ASSERT(last_ != NULL);
- return last_;
- }
-
- T* RemoveLast() {
- ASSERT(last_ != NULL);
- T* result = last_;
- if (list_ != NULL && list_->length() > 0)
- last_ = list_->RemoveLast();
- else
- last_ = NULL;
- return result;
- }
-
- T* Get(int i) {
- ASSERT(0 <= i && i < length());
- if (list_ == NULL) {
- ASSERT_EQ(0, i);
- return last_;
- } else {
- if (i == list_->length()) {
- ASSERT(last_ != NULL);
- return last_;
- } else {
- return list_->at(i);
- }
- }
- }
-
- void Clear() {
- list_ = NULL;
- last_ = NULL;
- }
-
- int length() {
- int length = (list_ == NULL) ? 0 : list_->length();
- return length + ((last_ == NULL) ? 0 : 1);
- }
-
- ZoneList<T*>* GetList() {
- if (list_ == NULL) {
- list_ = new ZoneList<T*>(initial_size);
- }
- if (last_ != NULL) {
- list_->Add(last_);
- last_ = NULL;
- }
- return list_;
- }
-
- private:
- ZoneList<T*>* list_;
- T* last_;
-};
-
-
-// Accumulates RegExp atoms and assertions into lists of terms and
alternatives.
-class RegExpBuilder: public ZoneObject {
- public:
- RegExpBuilder();
- void AddCharacter(uc16 character);
- // "Adds" an empty expression. Does nothing except consume a
- // following quantifier
- void AddEmpty();
- void AddAtom(RegExpTree* tree);
- void AddAssertion(RegExpTree* tree);
- void NewAlternative(); // '|'
- void AddQuantifierToAtom(int min, int max, RegExpQuantifier::Type type);
- RegExpTree* ToRegExp();
- private:
- void FlushCharacters();
- void FlushText();
- void FlushTerms();
- bool pending_empty_;
- ZoneList<uc16>* characters_;
- BufferedZoneList<RegExpTree, 2> terms_;
- BufferedZoneList<RegExpTree, 2> text_;
- BufferedZoneList<RegExpTree, 2> alternatives_;
-#ifdef DEBUG
- enum {ADD_NONE, ADD_CHAR, ADD_TERM, ADD_ASSERT, ADD_ATOM} last_added_;
-#define LAST(x) last_added_ = x;
-#else
-#define LAST(x)
-#endif
-};
-
-
RegExpBuilder::RegExpBuilder()
: pending_empty_(false),
characters_(NULL),
@@ -352,124 +246,13 @@
}
-class RegExpParser {
- public:
- RegExpParser(FlatStringReader* in,
- Handle<String>* error,
- bool multiline_mode);
- RegExpTree* ParsePattern();
- RegExpTree* ParseDisjunction();
- RegExpTree* ParseGroup();
- RegExpTree* ParseCharacterClass();
-
- // Parses a {...,...} quantifier and stores the range in the given
- // out parameters.
- bool ParseIntervalQuantifier(int* min_out, int* max_out);
-
- // Parses and returns a single escaped character. The character
- // must not be 'b' or 'B' since they are usually handle specially.
- uc32 ParseClassCharacterEscape();
-
- // Checks whether the following is a length-digit hexadecimal number,
- // and sets the value if it is.
- bool ParseHexEscape(int length, uc32* value);
-
- uc32 ParseControlLetterEscape();
- uc32 ParseOctalLiteral();
-
- // Tries to parse the input as a back reference. If successful it
- // stores the result in the output parameter and returns true. If
- // it fails it will push back the characters read so the same characters
- // can be reparsed.
- bool ParseBackReferenceIndex(int* index_out);
-
- CharacterRange ParseClassAtom(uc16* char_class);
- RegExpTree* ReportError(Vector<const char> message);
- void Advance();
- void Advance(int dist);
- void Reset(int pos);
-
- // Reports whether the pattern might be used as a literal search string.
- // Only use if the result of the parse is a single atom node.
- bool simple();
- bool contains_anchor() { return contains_anchor_; }
- void set_contains_anchor() { contains_anchor_ = true; }
- int captures_started() { return captures_ == NULL ? 0 :
captures_->length(); }
- int position() { return next_pos_ - 1; }
- bool failed() { return failed_; }
-
- static const int kMaxCaptures = 1 << 16;
- static const uc32 kEndMarker = (1 << 21);
-
- private:
- enum SubexpressionType {
- INITIAL,
- CAPTURE, // All positive values represent captures.
- POSITIVE_LOOKAHEAD,
- NEGATIVE_LOOKAHEAD,
- GROUPING
- };
-
- class RegExpParserState : public ZoneObject {
- public:
- RegExpParserState(RegExpParserState* previous_state,
- SubexpressionType group_type,
- int disjunction_capture_index)
- : previous_state_(previous_state),
- builder_(new RegExpBuilder()),
- group_type_(group_type),
- disjunction_capture_index_(disjunction_capture_index) {}
- // Parser state of containing expression, if any.
- RegExpParserState* previous_state() { return previous_state_; }
- bool IsSubexpression() { return previous_state_ != NULL; }
- // RegExpBuilder building this regexp's AST.
- RegExpBuilder* builder() { return builder_; }
- // Type of regexp being parsed (parenthesized group or entire regexp).
- SubexpressionType group_type() { return group_type_; }
- // Index in captures array of first capture in this sub-expression, if
any.
- // Also the capture index of this sub-expression itself, if group_type
- // is CAPTURE.
- int capture_index() { return disjunction_capture_index_; }
- private:
- // Linked list implementation of stack of states.
- RegExpParserState* previous_state_;
- // Builder for the stored disjunction.
- RegExpBuilder* builder_;
- // Stored disjunction type (capture, look-ahead or grouping), if any.
- SubexpressionType group_type_;
- // Stored disjunction's capture index (if any).
- int disjunction_capture_index_;
- };
-
- uc32 current() { return current_; }
- bool has_more() { return has_more_; }
- bool has_next() { return next_pos_ < in()->length(); }
- uc32 Next();
- FlatStringReader* in() { return in_; }
- void ScanForCaptures();
- uc32 current_;
- bool has_more_;
- bool multiline_;
- int next_pos_;
- FlatStringReader* in_;
- Handle<String>* error_;
- bool simple_;
- bool contains_anchor_;
- ZoneList<RegExpCapture*>* captures_;
- bool is_scanned_for_captures_;
- // The capture count is only valid after we have scanned for captures.
- int capture_count_;
- bool failed_;
-};
-
-
// A temporary scope stores information during parsing, just like
// a plain scope. However, temporary scopes are not kept around
// after parsing or referenced by syntax trees so they can be stack-
// allocated and hence used by the pre-parser.
class TemporaryScope BASE_EMBEDDED {
public:
- explicit TemporaryScope(Parser* parser);
+ explicit TemporaryScope(TemporaryScope** variable);
~TemporaryScope();
int NextMaterializedLiteralIndex() {
@@ -518,27 +301,25 @@
int loop_count_;
// Bookkeeping
- Parser* parser_;
+ TemporaryScope** variable_;
TemporaryScope* parent_;
-
- friend class Parser;
};
-TemporaryScope::TemporaryScope(Parser* parser)
+TemporaryScope::TemporaryScope(TemporaryScope** variable)
: materialized_literal_count_(0),
expected_property_count_(0),
only_simple_this_property_assignments_(false),
this_property_assignments_(Factory::empty_fixed_array()),
loop_count_(0),
- parser_(parser),
- parent_(parser->temp_scope_) {
- parser->temp_scope_ = this;
+ variable_(variable),
+ parent_(*variable) {
+ *variable = this;
}
TemporaryScope::~TemporaryScope() {
- parser_->temp_scope_ = parent_;
+ *variable_ = parent_;
}
@@ -1141,20 +922,20 @@
class Target BASE_EMBEDDED {
public:
- Target(Parser* parser, AstNode* node)
- : parser_(parser), node_(node), previous_(parser_->target_stack_) {
- parser_->target_stack_ = this;
+ Target(Target** variable, AstNode* node)
+ : variable_(variable), node_(node), previous_(*variable) {
+ *variable = this;
}
~Target() {
- parser_->target_stack_ = previous_;
+ *variable_ = previous_;
}
Target* previous() { return previous_; }
AstNode* node() { return node_; }
private:
- Parser* parser_;
+ Target** variable_;
AstNode* node_;
Target* previous_;
};
@@ -1162,17 +943,17 @@
class TargetScope BASE_EMBEDDED {
public:
- explicit TargetScope(Parser* parser)
- : parser_(parser), previous_(parser->target_stack_) {
- parser->target_stack_ = NULL;
+ explicit TargetScope(Target** variable)
+ : variable_(variable), previous_(*variable) {
+ *variable = NULL;
}
~TargetScope() {
- parser_->target_stack_ = previous_;
+ *variable_ = previous_;
}
private:
- Parser* parser_;
+ Target** variable_;
Target* previous_;
};
@@ -1184,22 +965,26 @@
class LexicalScope BASE_EMBEDDED {
public:
- LexicalScope(Parser* parser, Scope* scope)
- : parser_(parser),
- prev_scope_(parser->top_scope_),
- prev_level_(parser->with_nesting_level_) {
- parser_->top_scope_ = scope;
- parser_->with_nesting_level_ = 0;
+ LexicalScope(Scope** scope_variable,
+ int* with_nesting_level_variable,
+ Scope* scope)
+ : scope_variable_(scope_variable),
+ with_nesting_level_variable_(with_nesting_level_variable),
+ prev_scope_(*scope_variable),
+ prev_level_(*with_nesting_level_variable) {
+ *scope_variable = scope;
+ *with_nesting_level_variable = 0;
}
~LexicalScope() {
- parser_->top_scope_->Leave();
- parser_->top_scope_ = prev_scope_;
- parser_->with_nesting_level_ = prev_level_;
+ (*scope_variable_)->Leave();
+ *scope_variable_ = prev_scope_;
+ *with_nesting_level_variable_ = prev_level_;
}
private:
- Parser* parser_;
+ Scope** scope_variable_;
+ int* with_nesting_level_variable_;
Scope* prev_scope_;
int prev_level_;
};
@@ -1262,8 +1047,8 @@
mode_ = FLAG_lazy ? PARSE_LAZILY : PARSE_EAGERLY;
if (allow_natives_syntax_ || extension_ != NULL) mode_ = PARSE_EAGERLY;
DummyScope top_scope;
- LexicalScope scope(this, &top_scope);
- TemporaryScope temp_scope(this);
+ LexicalScope scope(&this->top_scope_, &this->with_nesting_level_,
&top_scope);
+ TemporaryScope temp_scope(&this->temp_scope_);
ZoneListWrapper<Statement> processor;
bool ok = true;
ParseSourceElements(&processor, Token::EOS, &ok);
@@ -1297,8 +1082,9 @@
FunctionLiteral* result = NULL;
{ Scope* scope = factory()->NewScope(top_scope_, type, inside_with());
- LexicalScope lexical_scope(this, scope);
- TemporaryScope temp_scope(this);
+ LexicalScope lexical_scope(&this->top_scope_,
&this->with_nesting_level_,
+ scope);
+ TemporaryScope temp_scope(&this->temp_scope_);
ZoneListWrapper<Statement> body(16);
bool ok = true;
ParseSourceElements(&body, Token::EOS, &ok);
@@ -1356,8 +1142,9 @@
Handle<String> no_name = factory()->EmptySymbol();
Scope* scope =
factory()->NewScope(top_scope_, Scope::GLOBAL_SCOPE,
inside_with());
- LexicalScope lexical_scope(this, scope);
- TemporaryScope temp_scope(this);
+ LexicalScope lexical_scope(&this->top_scope_,
&this->with_nesting_level_,
+ scope);
+ TemporaryScope temp_scope(&this->temp_scope_);
FunctionLiteralType type =
info->is_expression() ? EXPRESSION : DECLARATION;
@@ -1683,7 +1470,7 @@
// elements. This way, all scripts and functions get their own
// target stack thus avoiding illegal breaks and continues across
// functions.
- TargetScope scope(this);
+ TargetScope scope(&this->target_stack_);
ASSERT(processor != NULL);
InitializationBlockFinder block_finder;
@@ -1807,7 +1594,7 @@
// fall-through. It is much easier just to wrap the entire
// try-statement in a statement block and put the labels there
Block* result = NEW(Block(labels, 1, false));
- Target target(this, result);
+ Target target(&this->target_stack_, result);
TryStatement* statement = ParseTryStatement(CHECK_OK);
if (statement) {
statement->set_statement_pos(statement_pos);
@@ -2023,7 +1810,7 @@
//
// Construct block expecting 16 statements.
Block* result = NEW(Block(labels, 16, false));
- Target target(this, result);
+ Target target(&this->target_stack_, result);
Expect(Token::LBRACE, CHECK_OK);
while (peek() != Token::RBRACE) {
Statement* stat = ParseStatement(NULL, CHECK_OK);
@@ -2418,7 +2205,7 @@
ZoneList<BreakTarget*>* target_list = NEW(ZoneList<BreakTarget*>(0));
TargetCollector collector(target_list);
Statement* stat;
- { Target target(this, &collector);
+ { Target target(&this->target_stack_, &collector);
with_nesting_level_++;
top_scope_->RecordWithStatement();
stat = ParseStatement(labels, CHECK_OK);
@@ -2501,7 +2288,7 @@
// 'switch' '(' Expression ')' '{' CaseClause* '}'
SwitchStatement* statement = NEW(SwitchStatement(labels));
- Target target(this, statement);
+ Target target(&this->target_stack_, statement);
Expect(Token::SWITCH, CHECK_OK);
Expect(Token::LPAREN, CHECK_OK);
@@ -2558,7 +2345,7 @@
TargetCollector collector(target_list);
Block* try_block;
- { Target target(this, &collector);
+ { Target target(&this->target_stack_, &collector);
try_block = ParseBlock(NULL, CHECK_OK);
}
@@ -2594,7 +2381,7 @@
catch_var = top_scope_->NewTemporary(Factory::catch_var_symbol());
Literal* name_literal = NEW(Literal(name));
Expression* obj = NEW(CatchExtensionObject(name_literal, catch_var));
- { Target target(this, &catch_collector);
+ { Target target(&this->target_stack_, &catch_collector);
catch_block = WithHelper(obj, NULL, true, CHECK_OK);
}
} else {
@@ -2653,7 +2440,7 @@
temp_scope_->AddLoop();
DoWhileStatement* loop = NEW(DoWhileStatement(labels));
- Target target(this, loop);
+ Target target(&this->target_stack_, loop);
Expect(Token::DO, CHECK_OK);
Statement* body = ParseStatement(NULL, CHECK_OK);
@@ -2686,7 +2473,7 @@
temp_scope_->AddLoop();
WhileStatement* loop = NEW(WhileStatement(labels));
- Target target(this, loop);
+ Target target(&this->target_stack_, loop);
Expect(Token::WHILE, CHECK_OK);
Expect(Token::LPAREN, CHECK_OK);
@@ -2716,7 +2503,7 @@
ParseVariableDeclarations(false, &each, CHECK_OK);
if (peek() == Token::IN && each != NULL) {
ForInStatement* loop = NEW(ForInStatement(labels));
- Target target(this, loop);
+ Target target(&this->target_stack_, loop);
Expect(Token::IN, CHECK_OK);
Expression* enumerable = ParseExpression(true, CHECK_OK);
@@ -2750,7 +2537,7 @@
expression = NewThrowReferenceError(type);
}
ForInStatement* loop = NEW(ForInStatement(labels));
- Target target(this, loop);
+ Target target(&this->target_stack_, loop);
Expect(Token::IN, CHECK_OK);
Expression* enumerable = ParseExpression(true, CHECK_OK);
@@ -2769,7 +2556,7 @@
// Standard 'for' loop
ForStatement* loop = NEW(ForStatement(labels));
- Target target(this, loop);
+ Target target(&this->target_stack_, loop);
// Parsed initializer at this point.
Expect(Token::SEMICOLON, CHECK_OK);
@@ -3859,8 +3646,9 @@
// Parse function body.
{ Scope* scope =
factory()->NewScope(top_scope_, Scope::FUNCTION_SCOPE,
inside_with());
- LexicalScope lexical_scope(this, scope);
- TemporaryScope temp_scope(this);
+ LexicalScope lexical_scope(&this->top_scope_,
&this->with_nesting_level_,
+ scope);
+ TemporaryScope temp_scope(&this->temp_scope_);
top_scope_->SetScopeName(name);
// FormalParameterList ::
@@ -5217,9 +5005,9 @@
// Preparse, but only collect data that is immediately useful,
// even if the preparser data is only used once.
-ScriptDataImpl* Parser::PartialPreParse(Handle<String> source,
- unibrow::CharacterStream* stream,
- v8::Extension* extension) {
+ScriptDataImpl* ParserApi::PartialPreParse(Handle<String> source,
+ unibrow::CharacterStream*
stream,
+ v8::Extension* extension) {
Handle<Script> no_script;
bool allow_natives_syntax =
FLAG_allow_natives_syntax || Bootstrapper::IsActive();
@@ -5275,9 +5063,9 @@
}
-ScriptDataImpl* Parser::PreParse(Handle<String> source,
- unibrow::CharacterStream* stream,
- v8::Extension* extension) {
+ScriptDataImpl* ParserApi::PreParse(Handle<String> source,
+ unibrow::CharacterStream* stream,
+ v8::Extension* extension) {
Handle<Script> no_script;
bool allow_natives_syntax =
FLAG_allow_natives_syntax || Bootstrapper::IsActive();
@@ -5290,9 +5078,9 @@
}
-bool Parser::ParseRegExp(FlatStringReader* input,
- bool multiline,
- RegExpCompileData* result) {
+bool RegExpParser::ParseRegExp(FlatStringReader* input,
+ bool multiline,
+ RegExpCompileData* result) {
ASSERT(result != NULL);
RegExpParser parser(input, &result->error, multiline);
RegExpTree* tree = parser.ParsePattern();
@@ -5312,7 +5100,7 @@
}
-bool Parser::Parse(CompilationInfo* info) {
+bool ParserApi::Parse(CompilationInfo* info) {
ASSERT(info->function() == NULL);
FunctionLiteral* result = NULL;
Handle<Script> script = info->script();
=======================================
--- /branches/bleeding_edge/src/parser.h Wed Oct 27 02:19:43 2010
+++ /branches/bleeding_edge/src/parser.h Wed Oct 27 05:33:48 2010
@@ -177,13 +177,8 @@
};
-class Parser {
+class ParserApi {
public:
- Parser(Handle<Script> script, bool allow_natives_syntax,
- v8::Extension* extension, ParserMode is_pre_parsing,
- ParserFactory* factory, ParserLog* log, ScriptDataImpl* pre_data);
- virtual ~Parser() { }
-
// Parses the source code represented by the compilation info and sets
its
// function literal. Returns false (and deallocates any allocated AST
// nodes) if parsing failed.
@@ -199,10 +194,245 @@
static ScriptDataImpl* PartialPreParse(Handle<String> source,
unibrow::CharacterStream* stream,
v8::Extension* extension);
+};
+
+
+// A BuffferedZoneList is an automatically growing list, just like (and
backed
+// by) a ZoneList, that is optimized for the case of adding and removing
+// a single element. The last element added is stored outside the backing
list,
+// and if no more than one element is ever added, the ZoneList isn't even
+// allocated.
+// Elements must not be NULL pointers.
+template <typename T, int initial_size>
+class BufferedZoneList {
+ public:
+ BufferedZoneList() : list_(NULL), last_(NULL) {}
+
+ // Adds element at end of list. This element is buffered and can
+ // be read using last() or removed using RemoveLast until a new Add or
until
+ // RemoveLast or GetList has been called.
+ void Add(T* value) {
+ if (last_ != NULL) {
+ if (list_ == NULL) {
+ list_ = new ZoneList<T*>(initial_size);
+ }
+ list_->Add(last_);
+ }
+ last_ = value;
+ }
+
+ T* last() {
+ ASSERT(last_ != NULL);
+ return last_;
+ }
+
+ T* RemoveLast() {
+ ASSERT(last_ != NULL);
+ T* result = last_;
+ if ((list_ != NULL) && (list_->length() > 0))
+ last_ = list_->RemoveLast();
+ else
+ last_ = NULL;
+ return result;
+ }
+
+ T* Get(int i) {
+ ASSERT((0 <= i) && (i < length()));
+ if (list_ == NULL) {
+ ASSERT_EQ(0, i);
+ return last_;
+ } else {
+ if (i == list_->length()) {
+ ASSERT(last_ != NULL);
+ return last_;
+ } else {
+ return list_->at(i);
+ }
+ }
+ }
+
+ void Clear() {
+ list_ = NULL;
+ last_ = NULL;
+ }
+
+ int length() {
+ int length = (list_ == NULL) ? 0 : list_->length();
+ return length + ((last_ == NULL) ? 0 : 1);
+ }
+
+ ZoneList<T*>* GetList() {
+ if (list_ == NULL) {
+ list_ = new ZoneList<T*>(initial_size);
+ }
+ if (last_ != NULL) {
+ list_->Add(last_);
+ last_ = NULL;
+ }
+ return list_;
+ }
+
+ private:
+ ZoneList<T*>* list_;
+ T* last_;
+};
+
+
+// Accumulates RegExp atoms and assertions into lists of terms and
alternatives.
+class RegExpBuilder: public ZoneObject {
+ public:
+ RegExpBuilder();
+ void AddCharacter(uc16 character);
+ // "Adds" an empty expression. Does nothing except consume a
+ // following quantifier
+ void AddEmpty();
+ void AddAtom(RegExpTree* tree);
+ void AddAssertion(RegExpTree* tree);
+ void NewAlternative(); // '|'
+ void AddQuantifierToAtom(int min, int max, RegExpQuantifier::Type type);
+ RegExpTree* ToRegExp();
+
+ private:
+ void FlushCharacters();
+ void FlushText();
+ void FlushTerms();
+ bool pending_empty_;
+ ZoneList<uc16>* characters_;
+ BufferedZoneList<RegExpTree, 2> terms_;
+ BufferedZoneList<RegExpTree, 2> text_;
+ BufferedZoneList<RegExpTree, 2> alternatives_;
+#ifdef DEBUG
+ enum {ADD_NONE, ADD_CHAR, ADD_TERM, ADD_ASSERT, ADD_ATOM} last_added_;
+#define LAST(x) last_added_ = x;
+#else
+#define LAST(x)
+#endif
+};
+
+
+class RegExpParser {
+ public:
+ RegExpParser(FlatStringReader* in,
+ Handle<String>* error,
+ bool multiline_mode);
static bool ParseRegExp(FlatStringReader* input,
bool multiline,
RegExpCompileData* result);
+
+ RegExpTree* ParsePattern();
+ RegExpTree* ParseDisjunction();
+ RegExpTree* ParseGroup();
+ RegExpTree* ParseCharacterClass();
+
+ // Parses a {...,...} quantifier and stores the range in the given
+ // out parameters.
+ bool ParseIntervalQuantifier(int* min_out, int* max_out);
+
+ // Parses and returns a single escaped character. The character
+ // must not be 'b' or 'B' since they are usually handle specially.
+ uc32 ParseClassCharacterEscape();
+
+ // Checks whether the following is a length-digit hexadecimal number,
+ // and sets the value if it is.
+ bool ParseHexEscape(int length, uc32* value);
+
+ uc32 ParseControlLetterEscape();
+ uc32 ParseOctalLiteral();
+
+ // Tries to parse the input as a back reference. If successful it
+ // stores the result in the output parameter and returns true. If
+ // it fails it will push back the characters read so the same characters
+ // can be reparsed.
+ bool ParseBackReferenceIndex(int* index_out);
+
+ CharacterRange ParseClassAtom(uc16* char_class);
+ RegExpTree* ReportError(Vector<const char> message);
+ void Advance();
+ void Advance(int dist);
+ void Reset(int pos);
+
+ // Reports whether the pattern might be used as a literal search string.
+ // Only use if the result of the parse is a single atom node.
+ bool simple();
+ bool contains_anchor() { return contains_anchor_; }
+ void set_contains_anchor() { contains_anchor_ = true; }
+ int captures_started() { return captures_ == NULL ? 0 :
captures_->length(); }
+ int position() { return next_pos_ - 1; }
+ bool failed() { return failed_; }
+
+ static const int kMaxCaptures = 1 << 16;
+ static const uc32 kEndMarker = (1 << 21);
+
+ private:
+ enum SubexpressionType {
+ INITIAL,
+ CAPTURE, // All positive values represent captures.
+ POSITIVE_LOOKAHEAD,
+ NEGATIVE_LOOKAHEAD,
+ GROUPING
+ };
+
+ class RegExpParserState : public ZoneObject {
+ public:
+ RegExpParserState(RegExpParserState* previous_state,
+ SubexpressionType group_type,
+ int disjunction_capture_index)
+ : previous_state_(previous_state),
+ builder_(new RegExpBuilder()),
+ group_type_(group_type),
+ disjunction_capture_index_(disjunction_capture_index) {}
+ // Parser state of containing expression, if any.
+ RegExpParserState* previous_state() { return previous_state_; }
+ bool IsSubexpression() { return previous_state_ != NULL; }
+ // RegExpBuilder building this regexp's AST.
+ RegExpBuilder* builder() { return builder_; }
+ // Type of regexp being parsed (parenthesized group or entire regexp).
+ SubexpressionType group_type() { return group_type_; }
+ // Index in captures array of first capture in this sub-expression, if
any.
+ // Also the capture index of this sub-expression itself, if group_type
+ // is CAPTURE.
+ int capture_index() { return disjunction_capture_index_; }
+
+ private:
+ // Linked list implementation of stack of states.
+ RegExpParserState* previous_state_;
+ // Builder for the stored disjunction.
+ RegExpBuilder* builder_;
+ // Stored disjunction type (capture, look-ahead or grouping), if any.
+ SubexpressionType group_type_;
+ // Stored disjunction's capture index (if any).
+ int disjunction_capture_index_;
+ };
+
+ uc32 current() { return current_; }
+ bool has_more() { return has_more_; }
+ bool has_next() { return next_pos_ < in()->length(); }
+ uc32 Next();
+ FlatStringReader* in() { return in_; }
+ void ScanForCaptures();
+ uc32 current_;
+ bool has_more_;
+ bool multiline_;
+ int next_pos_;
+ FlatStringReader* in_;
+ Handle<String>* error_;
+ bool simple_;
+ bool contains_anchor_;
+ ZoneList<RegExpCapture*>* captures_;
+ bool is_scanned_for_captures_;
+ // The capture count is only valid after we have scanned for captures.
+ int capture_count_;
+ bool failed_;
+};
+
+
+class Parser {
+ public:
+ Parser(Handle<Script> script, bool allow_natives_syntax,
+ v8::Extension* extension, ParserMode is_pre_parsing,
+ ParserFactory* factory, ParserLog* log, ScriptDataImpl* pre_data);
+ virtual ~Parser() { }
// Pre-parse the program from the character stream; returns true on
// success, false if a stack-overflow happened during parsing.
@@ -409,11 +639,6 @@
Expression* NewThrowError(Handle<String> constructor,
Handle<String> type,
Vector< Handle<Object> > arguments);
-
- friend class Target;
- friend class TargetScope;
- friend class LexicalScope;
- friend class TemporaryScope;
};
=======================================
--- /branches/bleeding_edge/test/cctest/test-regexp.cc Fri Oct 1 08:32:32
2010
+++ /branches/bleeding_edge/test/cctest/test-regexp.cc Wed Oct 27 05:33:48
2010
@@ -64,7 +64,7 @@
ZoneScope zone_scope(DELETE_ON_EXIT);
FlatStringReader reader(CStrVector(input));
RegExpCompileData result;
- return v8::internal::Parser::ParseRegExp(&reader, false, &result);
+ return v8::internal::RegExpParser::ParseRegExp(&reader, false, &result);
}
@@ -74,7 +74,7 @@
ZoneScope zone_scope(DELETE_ON_EXIT);
FlatStringReader reader(CStrVector(input));
RegExpCompileData result;
- CHECK(v8::internal::Parser::ParseRegExp(&reader, false, &result));
+ CHECK(v8::internal::RegExpParser::ParseRegExp(&reader, false, &result));
CHECK(result.tree != NULL);
CHECK(result.error.is_null());
SmartPointer<const char> output = result.tree->ToString();
@@ -88,7 +88,7 @@
ZoneScope zone_scope(DELETE_ON_EXIT);
FlatStringReader reader(CStrVector(input));
RegExpCompileData result;
- CHECK(v8::internal::Parser::ParseRegExp(&reader, false, &result));
+ CHECK(v8::internal::RegExpParser::ParseRegExp(&reader, false, &result));
CHECK(result.tree != NULL);
CHECK(result.error.is_null());
return result.simple;
@@ -106,7 +106,7 @@
ZoneScope zone_scope(DELETE_ON_EXIT);
FlatStringReader reader(CStrVector(input));
RegExpCompileData result;
- CHECK(v8::internal::Parser::ParseRegExp(&reader, false, &result));
+ CHECK(v8::internal::RegExpParser::ParseRegExp(&reader, false, &result));
CHECK(result.tree != NULL);
CHECK(result.error.is_null());
int min_match = result.tree->min_match();
@@ -365,7 +365,7 @@
ZoneScope zone_scope(DELETE_ON_EXIT);
FlatStringReader reader(CStrVector(input));
RegExpCompileData result;
- CHECK_EQ(false, v8::internal::Parser::ParseRegExp(&reader, false,
&result));
+ CHECK(!v8::internal::RegExpParser::ParseRegExp(&reader, false, &result));
CHECK(result.tree == NULL);
CHECK(!result.error.is_null());
SmartPointer<char> str = result.error->ToCString(ALLOW_NULLS);
@@ -473,7 +473,8 @@
V8::Initialize(NULL);
FlatStringReader reader(CStrVector(input));
RegExpCompileData compile_data;
- if (!v8::internal::Parser::ParseRegExp(&reader, multiline,
&compile_data))
+ if (!v8::internal::RegExpParser::ParseRegExp(&reader, multiline,
+ &compile_data))
return NULL;
Handle<String> pattern = Factory::NewStringFromUtf8(CStrVector(input));
RegExpEngine::Compile(&compile_data, false, multiline, pattern,
is_ascii);
--
v8-dev mailing list
[email protected]
http://groups.google.com/group/v8-dev