Revision: 5750
Author: [email protected]
Date: Tue Nov 2 00:21:37 2010
Log: Stand-alone parser template.
Uses existing Scanner and ParserLog.
Generates same preparse-data as existing preparser.
Review URL: http://codereview.chromium.org/4112012
http://code.google.com/p/v8/source/detail?r=5750
Added:
/branches/bleeding_edge/src/preparser.h
Modified:
/branches/bleeding_edge/src/parser.cc
/branches/bleeding_edge/src/parser.h
/branches/bleeding_edge/test/cctest/test-parsing.cc
=======================================
--- /dev/null
+++ /branches/bleeding_edge/src/preparser.h Tue Nov 2 00:21:37 2010
@@ -0,0 +1,1428 @@
+// Copyright 2010 the V8 project authors. All rights reserved.
+// Redistribution and use in source and binary forms, with or without
+// modification, are permitted provided that the following conditions are
+// met:
+//
+// * Redistributions of source code must retain the above copyright
+// notice, this list of conditions and the following disclaimer.
+// * Redistributions in binary form must reproduce the above
+// copyright notice, this list of conditions and the following
+// disclaimer in the documentation and/or other materials provided
+// with the distribution.
+// * Neither the name of Google Inc. nor the names of its
+// contributors may be used to endorse or promote products derived
+// from this software without specific prior written permission.
+//
+// THIS SOFTWARE IS PROVIDED BY THE COPYRIGHT HOLDERS AND CONTRIBUTORS
+// "AS IS" AND ANY EXPRESS OR IMPLIED WARRANTIES, INCLUDING, BUT NOT
+// LIMITED TO, THE IMPLIED WARRANTIES OF MERCHANTABILITY AND FITNESS FOR
+// A PARTICULAR PURPOSE ARE DISCLAIMED. IN NO EVENT SHALL THE COPYRIGHT
+// OWNER OR CONTRIBUTORS BE LIABLE FOR ANY DIRECT, INDIRECT, INCIDENTAL,
+// SPECIAL, EXEMPLARY, OR CONSEQUENTIAL DAMAGES (INCLUDING, BUT NOT
+// LIMITED TO, PROCUREMENT OF SUBSTITUTE GOODS OR SERVICES; LOSS OF USE,
+// DATA, OR PROFITS; OR BUSINESS INTERRUPTION) HOWEVER CAUSED AND ON ANY
+// THEORY OF LIABILITY, WHETHER IN CONTRACT, STRICT LIABILITY, OR TORT
+// (INCLUDING NEGLIGENCE OR OTHERWISE) ARISING IN ANY WAY OUT OF THE USE
+// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
+
+#ifndef V8_PREPARSER_H
+#define V8_PREPARSER_H
+
+#include "unicode.h"
+
+namespace v8 {
+namespace internal {
+namespace preparser {
+
+// Preparsing checks a JavaScript program and emits preparse-data that
helps
+// a later parsing to be faster.
+// See preparser-data.h for the data.
+
+// The PreParser checks that the syntax follows the grammar for JavaScript,
+// and collects some information about the program along the way.
+// The grammar check is only performed in order to understand the program
+// sufficiently to deduce some information about it, that can be used
+// to speed up later parsing. Finding errors is not the goal of
pre-parsing,
+// rather it is to speed up properly written and correct programs.
+// That means that contextual checks (like a label being declared where
+// it is used) are generally omitted.
+
+enum StatementType {
+ kUnknownStatement
+};
+
+enum ExpressionType {
+ kUnknownExpression,
+ kIdentifierExpression, // Used to detect labels.
+ kThisExpression,
+ kThisPropertyExpression
+};
+
+enum IdentifierType {
+ kUnknownIdentifier
+};
+
+enum SourceElementTypes {
+ kUnknownSourceElements
+};
+
+
+typedef int SourceElements;
+typedef int Expression;
+typedef int Statement;
+typedef int Identifier;
+typedef int Arguments;
+
+
+template <typename Scanner, typename PreParserLog>
+class PreParser {
+ public:
+ PreParser() : scope_(NULL), allow_lazy_(true) { }
+ ~PreParser() { }
+
+ // Pre-parse the program from the character stream; returns true on
+ // success (even if parsing failed, the pre-parse data successfully
+ // captured the syntax error), and false if a stack-overflow happened
+ // during parsing.
+ bool PreParseProgram(Scanner* scanner,
+ PreParserLog* log,
+ bool allow_lazy) {
+ allow_lazy_ = allow_lazy;
+ scanner_ = scanner;
+ log_ = log;
+ Scope top_scope(&scope_, kTopLevelScope);
+ bool ok = true;
+ ParseSourceElements(Token::EOS, &ok);
+ bool stack_overflow = scanner_->stack_overflow();
+ if (!ok && !stack_overflow) {
+ ReportUnexpectedToken(scanner_->current_token());
+ }
+ return !stack_overflow;
+ }
+
+ private:
+ enum ScopeType {
+ kTopLevelScope,
+ kFunctionScope
+ };
+
+ class Scope {
+ public:
+ Scope(Scope** variable, ScopeType type)
+ : variable_(variable),
+ prev_(*variable),
+ type_(type),
+ materialized_literal_count_(0),
+ expected_properties_(0),
+ with_nesting_count_(0) {
+ *variable = this;
+ }
+ ~Scope() { *variable_ = prev_; }
+ void NextMaterializedLiteralIndex() { materialized_literal_count_++; }
+ void AddProperty() { expected_properties_++; }
+ ScopeType type() { return type_; }
+ int expected_properties() { return expected_properties_; }
+ int materialized_literal_count() { return materialized_literal_count_;
}
+ bool IsInsideWith() { return with_nesting_count_ != 0; }
+ void EnterWith() { with_nesting_count_++; }
+ void LeaveWith() { with_nesting_count_--; }
+
+ private:
+ Scope** const variable_;
+ Scope* const prev_;
+ const ScopeType type_;
+ int materialized_literal_count_;
+ int expected_properties_;
+ int with_nesting_count_;
+ };
+
+ // Types that allow us to recognize simple this-property assignments.
+ // A simple this-property assignment is a statement on the form
+ // "this.propertyName = {primitive constant or function parameter name);"
+ // where propertyName isn't "__proto__".
+ // The result is only relevant if the function body contains only
+ // simple this-property assignments.
+
+ // Report syntax error
+ void ReportUnexpectedToken(Token::Value token);
+ void ReportMessageAt(int start_pos,
+ int end_pos,
+ const char* type,
+ const char* name_opt) {
+ log_->LogMessage(start_pos, end_pos, type, name_opt);
+ }
+
+ // All ParseXXX functions take as the last argument an *ok parameter
+ // which is set to false if parsing failed; it is unchanged otherwise.
+ // By making the 'exception handling' explicit, we are forced to check
+ // for failure at the call sites.
+ SourceElements ParseSourceElements(int end_token, bool* ok);
+ Statement ParseStatement(bool* ok);
+ Statement ParseFunctionDeclaration(bool* ok);
+ Statement ParseNativeDeclaration(bool* ok);
+ Statement ParseBlock(bool* ok);
+ Statement ParseVariableStatement(bool* ok);
+ Statement ParseVariableDeclarations(bool accept_IN, int* num_decl, bool*
ok);
+ Statement ParseExpressionOrLabelledStatement(bool* ok);
+ Statement ParseIfStatement(bool* ok);
+ Statement ParseContinueStatement(bool* ok);
+ Statement ParseBreakStatement(bool* ok);
+ Statement ParseReturnStatement(bool* ok);
+ Statement ParseWithStatement(bool* ok);
+ Statement ParseSwitchStatement(bool* ok);
+ Statement ParseDoWhileStatement(bool* ok);
+ Statement ParseWhileStatement(bool* ok);
+ Statement ParseForStatement(bool* ok);
+ Statement ParseThrowStatement(bool* ok);
+ Statement ParseTryStatement(bool* ok);
+ Statement ParseDebuggerStatement(bool* ok);
+
+ Expression ParseExpression(bool accept_IN, bool* ok);
+ Expression ParseAssignmentExpression(bool accept_IN, bool* ok);
+ Expression ParseConditionalExpression(bool accept_IN, bool* ok);
+ Expression ParseBinaryExpression(int prec, bool accept_IN, bool* ok);
+ Expression ParseUnaryExpression(bool* ok);
+ Expression ParsePostfixExpression(bool* ok);
+ Expression ParseLeftHandSideExpression(bool* ok);
+ Expression ParseNewExpression(bool* ok);
+ Expression ParseMemberExpression(bool* ok);
+ Expression ParseNewPrefix(int* new_count, bool* ok);
+ Expression ParseMemberWithNewPrefixesExpression(int* new_count, bool*
ok);
+ Expression ParsePrimaryExpression(bool* ok);
+ Expression ParseArrayLiteral(bool* ok);
+ Expression ParseObjectLiteral(bool* ok);
+ Expression ParseRegExpLiteral(bool seen_equal, bool* ok);
+ Expression ParseV8Intrinsic(bool* ok);
+
+ Arguments ParseArguments(bool* ok);
+ Expression ParseFunctionLiteral(bool* ok);
+
+ Identifier ParseIdentifier(bool* ok);
+ Identifier ParseIdentifierName(bool* ok);
+ Identifier ParseIdentifierOrGetOrSet(bool* is_get, bool* is_set, bool*
ok);
+
+ Identifier GetIdentifierSymbol();
+ unsigned int HexDigitValue(char digit);
+ Expression GetStringSymbol();
+
+
+ Token::Value peek() { return scanner_->peek(); }
+ Token::Value Next() {
+ Token::Value next = scanner_->Next();
+ return next;
+ }
+
+ void Consume(Token::Value token) {
+ Next();
+ }
+
+ void Expect(Token::Value token, bool* ok) {
+ if (Next() != token) {
+ *ok = false;
+ }
+ }
+
+ bool Check(Token::Value token) {
+ Token::Value next = peek();
+ if (next == token) {
+ Consume(next);
+ return true;
+ }
+ return false;
+ }
+ void ExpectSemicolon(bool* ok);
+
+ static int Precedence(Token::Value tok, bool accept_IN);
+
+ Scanner* scanner_;
+ PreParserLog* log_;
+ Scope* scope_;
+ bool allow_lazy_;
+};
+
+
+#define CHECK_OK ok); \
+ if (!*ok) return -1; \
+ ((void)0
+#define DUMMY ) // to make indentation work
+#undef DUMMY
+
+
+template <typename Scanner, typename Log>
+void PreParser<Scanner, Log>::ReportUnexpectedToken(Token::Value token) {
+ // We don't report stack overflows here, to avoid increasing the
+ // stack depth even further. Instead we report it after parsing is
+ // over, in ParseProgram.
+ if (token == Token::ILLEGAL && scanner_->stack_overflow()) {
+ return;
+ }
+ typename Scanner::Location source_location = scanner_->location();
+
+ // Four of the tokens are treated specially
+ switch (token) {
+ case Token::EOS:
+ return ReportMessageAt(source_location.beg_pos,
source_location.end_pos,
+ "unexpected_eos", NULL);
+ case Token::NUMBER:
+ return ReportMessageAt(source_location.beg_pos,
source_location.end_pos,
+ "unexpected_token_number", NULL);
+ case Token::STRING:
+ return ReportMessageAt(source_location.beg_pos,
source_location.end_pos,
+ "unexpected_token_string", NULL);
+ case Token::IDENTIFIER:
+ return ReportMessageAt(source_location.beg_pos,
source_location.end_pos,
+ "unexpected_token_identifier", NULL);
+ default:
+ const char* name = Token::String(token);
+ ReportMessageAt(source_location.beg_pos, source_location.end_pos,
+ "unexpected_token", name);
+ }
+}
+
+
+template <typename Scanner, typename Log>
+SourceElements PreParser<Scanner, Log>::ParseSourceElements(int end_token,
+ bool* ok) {
+ // SourceElements ::
+ // (Statement)* <end_token>
+
+ while (peek() != end_token) {
+ ParseStatement(CHECK_OK);
+ }
+ return kUnknownSourceElements;
+}
+
+
+template <typename Scanner, typename Log>
+Statement PreParser<Scanner, Log>::ParseStatement(bool* ok) {
+ // Statement ::
+ // Block
+ // VariableStatement
+ // EmptyStatement
+ // ExpressionStatement
+ // IfStatement
+ // IterationStatement
+ // ContinueStatement
+ // BreakStatement
+ // ReturnStatement
+ // WithStatement
+ // LabelledStatement
+ // SwitchStatement
+ // ThrowStatement
+ // TryStatement
+ // DebuggerStatement
+
+ // Note: Since labels can only be used by 'break' and 'continue'
+ // statements, which themselves are only valid within blocks,
+ // iterations or 'switch' statements (i.e., BreakableStatements),
+ // labels can be simply ignored in all other cases; except for
+ // trivial labeled break statements 'label: break label' which is
+ // parsed into an empty statement.
+
+ // Keep the source position of the statement
+ switch (peek()) {
+ case Token::LBRACE:
+ return ParseBlock(ok);
+
+ case Token::CONST:
+ case Token::VAR:
+ return ParseVariableStatement(ok);
+
+ case Token::SEMICOLON:
+ Next();
+ return kUnknownStatement;
+
+ case Token::IF:
+ return ParseIfStatement(ok);
+
+ case Token::DO:
+ return ParseDoWhileStatement(ok);
+
+ case Token::WHILE:
+ return ParseWhileStatement(ok);
+
+ case Token::FOR:
+ return ParseForStatement(ok);
+
+ case Token::CONTINUE:
+ return ParseContinueStatement(ok);
+
+ case Token::BREAK:
+ return ParseBreakStatement(ok);
+
+ case Token::RETURN:
+ return ParseReturnStatement(ok);
+
+ case Token::WITH:
+ return ParseWithStatement(ok);
+
+ case Token::SWITCH:
+ return ParseSwitchStatement(ok);
+
+ case Token::THROW:
+ return ParseThrowStatement(ok);
+
+ case Token::TRY:
+ return ParseTryStatement(ok);
+
+ case Token::FUNCTION:
+ return ParseFunctionDeclaration(ok);
+
+ case Token::NATIVE:
+ return ParseNativeDeclaration(ok);
+
+ case Token::DEBUGGER:
+ return ParseDebuggerStatement(ok);
+
+ default:
+ return ParseExpressionOrLabelledStatement(ok);
+ }
+}
+
+
+template <typename Scanner, typename Log>
+Statement PreParser<Scanner, Log>::ParseFunctionDeclaration(bool* ok) {
+ // FunctionDeclaration ::
+ // 'function' Identifier '(' FormalParameterListopt ')' '{'
FunctionBody '}'
+ Expect(Token::FUNCTION, CHECK_OK);
+ ParseIdentifier(CHECK_OK);
+ ParseFunctionLiteral(CHECK_OK);
+ return kUnknownStatement;
+}
+
+
+// Language extension which is only enabled for source files loaded
+// through the API's extension mechanism. A native function
+// declaration is resolved by looking up the function through a
+// callback provided by the extension.
+template <typename Scanner, typename Log>
+Statement PreParser<Scanner, Log>::ParseNativeDeclaration(bool* ok) {
+ Expect(Token::NATIVE, CHECK_OK);
+ Expect(Token::FUNCTION, CHECK_OK);
+ ParseIdentifier(CHECK_OK);
+ Expect(Token::LPAREN, CHECK_OK);
+ bool done = (peek() == Token::RPAREN);
+ while (!done) {
+ ParseIdentifier(CHECK_OK);
+ done = (peek() == Token::RPAREN);
+ if (!done) {
+ Expect(Token::COMMA, CHECK_OK);
+ }
+ }
+ Expect(Token::RPAREN, CHECK_OK);
+ Expect(Token::SEMICOLON, CHECK_OK);
+ return kUnknownStatement;
+}
+
+
+template <typename Scanner, typename Log>
+Statement PreParser<Scanner, Log>::ParseBlock(bool* ok) {
+ // Block ::
+ // '{' Statement* '}'
+
+ // Note that a Block does not introduce a new execution scope!
+ // (ECMA-262, 3rd, 12.2)
+ //
+ Expect(Token::LBRACE, CHECK_OK);
+ while (peek() != Token::RBRACE) {
+ ParseStatement(CHECK_OK);
+ }
+ Expect(Token::RBRACE, CHECK_OK);
+ return kUnknownStatement;
+}
+
+
+template <typename Scanner, typename Log>
+Statement PreParser<Scanner, Log>::ParseVariableStatement(bool* ok) {
+ // VariableStatement ::
+ // VariableDeclarations ';'
+
+ Statement result = ParseVariableDeclarations(true, NULL, CHECK_OK);
+ ExpectSemicolon(CHECK_OK);
+ return result;
+}
+
+
+// If the variable declaration declares exactly one non-const
+// variable, then *var is set to that variable. In all other cases,
+// *var is untouched; in particular, it is the caller's responsibility
+// to initialize it properly. This mechanism is also used for the parsing
+// of 'for-in' loops.
+template <typename Scanner, typename Log>
+Statement PreParser<Scanner, Log>::ParseVariableDeclarations(bool
accept_IN,
+ int* num_decl,
+ bool* ok) {
+ // VariableDeclarations ::
+ // ('var' | 'const') (Identifier ('=' AssignmentExpression)?)+[',']
+
+ if (peek() == Token::VAR) {
+ Consume(Token::VAR);
+ } else if (peek() == Token::CONST) {
+ Consume(Token::CONST);
+ } else {
+ *ok = false;
+ return 0;
+ }
+
+ // The scope of a variable/const declared anywhere inside a function
+ // is the entire function (ECMA-262, 3rd, 10.1.3, and 12.2). .
+ int nvars = 0; // the number of variables declared
+ do {
+ // Parse variable name.
+ if (nvars > 0) Consume(Token::COMMA);
+ ParseIdentifier(CHECK_OK);
+ nvars++;
+ if (peek() == Token::ASSIGN) {
+ Expect(Token::ASSIGN, CHECK_OK);
+ ParseAssignmentExpression(accept_IN, CHECK_OK);
+ }
+ } while (peek() == Token::COMMA);
+
+ if (num_decl != NULL) *num_decl = nvars;
+ return kUnknownStatement;
+}
+
+
+template <typename Scanner, typename Log>
+Statement PreParser<Scanner, Log>::ParseExpressionOrLabelledStatement(
+ bool* ok) {
+ // ExpressionStatement | LabelledStatement ::
+ // Expression ';'
+ // Identifier ':' Statement
+
+ Expression expr = ParseExpression(true, CHECK_OK);
+ if (peek() == Token::COLON && expr == kIdentifierExpression) {
+ Consume(Token::COLON);
+ return ParseStatement(ok);
+ }
+ // Parsed expression statement.
+ ExpectSemicolon(CHECK_OK);
+ return kUnknownStatement;
+}
+
+
+template <typename Scanner, typename Log>
+Statement PreParser<Scanner, Log>::ParseIfStatement(bool* ok) {
+ // IfStatement ::
+ // 'if' '(' Expression ')' Statement ('else' Statement)?
+
+ Expect(Token::IF, CHECK_OK);
+ Expect(Token::LPAREN, CHECK_OK);
+ ParseExpression(true, CHECK_OK);
+ Expect(Token::RPAREN, CHECK_OK);
+ ParseStatement(CHECK_OK);
+ if (peek() == Token::ELSE) {
+ Next();
+ ParseStatement(CHECK_OK);
+ }
+ return kUnknownStatement;
+}
+
+
+template <typename Scanner, typename Log>
+Statement PreParser<Scanner, Log>::ParseContinueStatement(bool* ok) {
+ // ContinueStatement ::
+ // 'continue' [no line terminator] Identifier? ';'
+
+ Expect(Token::CONTINUE, CHECK_OK);
+ Token::Value tok = peek();
+ if (!scanner_->has_line_terminator_before_next() &&
+ tok != Token::SEMICOLON &&
+ tok != Token::RBRACE &&
+ tok != Token::EOS) {
+ ParseIdentifier(CHECK_OK);
+ }
+ ExpectSemicolon(CHECK_OK);
+ return kUnknownStatement;
+}
+
+
+template <typename Scanner, typename Log>
+Statement PreParser<Scanner, Log>::ParseBreakStatement(bool* ok) {
+ // BreakStatement ::
+ // 'break' [no line terminator] Identifier? ';'
+
+ Expect(Token::BREAK, CHECK_OK);
+ Token::Value tok = peek();
+ if (!scanner_->has_line_terminator_before_next() &&
+ tok != Token::SEMICOLON &&
+ tok != Token::RBRACE &&
+ tok != Token::EOS) {
+ ParseIdentifier(CHECK_OK);
+ }
+ ExpectSemicolon(CHECK_OK);
+ return kUnknownStatement;
+}
+
+
+template <typename Scanner, typename Log>
+Statement PreParser<Scanner, Log>::ParseReturnStatement(bool* ok) {
+ // ReturnStatement ::
+ // 'return' [no line terminator] Expression? ';'
+
+ // Consume the return token. It is necessary to do the before
+ // reporting any errors on it, because of the way errors are
+ // reported (underlining).
+ Expect(Token::RETURN, CHECK_OK);
+
+ // An ECMAScript program is considered syntactically incorrect if it
+ // contains a return statement that is not within the body of a
+ // function. See ECMA-262, section 12.9, page 67.
+ // This is not handled during preparsing.
+
+ Token::Value tok = peek();
+ if (!scanner_->has_line_terminator_before_next() &&
+ tok != Token::SEMICOLON &&
+ tok != Token::RBRACE &&
+ tok != Token::EOS) {
+ ParseExpression(true, CHECK_OK);
+ }
+ ExpectSemicolon(CHECK_OK);
+ return kUnknownStatement;
+}
+
+
+template <typename Scanner, typename Log>
+Statement PreParser<Scanner, Log>::ParseWithStatement(bool* ok) {
+ // WithStatement ::
+ // 'with' '(' Expression ')' Statement
+ Expect(Token::WITH, CHECK_OK);
+ Expect(Token::LPAREN, CHECK_OK);
+ ParseExpression(true, CHECK_OK);
+ Expect(Token::RPAREN, CHECK_OK);
+
+ scope_->EnterWith();
+ ParseStatement(CHECK_OK);
+ scope_->LeaveWith();
+ return kUnknownStatement;
+}
+
+
+template <typename Scanner, typename Log>
+Statement PreParser<Scanner, Log>::ParseSwitchStatement(bool* ok) {
+ // SwitchStatement ::
+ // 'switch' '(' Expression ')' '{' CaseClause* '}'
+
+ Expect(Token::SWITCH, CHECK_OK);
+ Expect(Token::LPAREN, CHECK_OK);
+ ParseExpression(true, CHECK_OK);
+ Expect(Token::RPAREN, CHECK_OK);
+
+ Expect(Token::LBRACE, CHECK_OK);
+ Token::Value token = peek();
+ while (token != Token::RBRACE) {
+ if (token == Token::CASE) {
+ Expect(Token::CASE, CHECK_OK);
+ ParseExpression(true, CHECK_OK);
+ Expect(Token::COLON, CHECK_OK);
+ } else if (token == Token::DEFAULT) {
+ Expect(Token::DEFAULT, CHECK_OK);
+ Expect(Token::COLON, CHECK_OK);
+ } else {
+ ParseStatement(CHECK_OK);
+ }
+ token = peek();
+ }
+ Expect(Token::RBRACE, CHECK_OK);
+
+ return kUnknownStatement;
+}
+
+
+template <typename Scanner, typename Log>
+Statement PreParser<Scanner, Log>::ParseDoWhileStatement(bool* ok) {
+ // DoStatement ::
+ // 'do' Statement 'while' '(' Expression ')' ';'
+
+ Expect(Token::DO, CHECK_OK);
+ ParseStatement(CHECK_OK);
+ Expect(Token::WHILE, CHECK_OK);
+ Expect(Token::LPAREN, CHECK_OK);
+ ParseExpression(true, CHECK_OK);
+ Expect(Token::RPAREN, CHECK_OK);
+ return kUnknownStatement;
+}
+
+
+template <typename Scanner, typename Log>
+Statement PreParser<Scanner, Log>::ParseWhileStatement(bool* ok) {
+ // WhileStatement ::
+ // 'while' '(' Expression ')' Statement
+
+ Expect(Token::WHILE, CHECK_OK);
+ Expect(Token::LPAREN, CHECK_OK);
+ ParseExpression(true, CHECK_OK);
+ Expect(Token::RPAREN, CHECK_OK);
+ ParseStatement(CHECK_OK);
+ return kUnknownStatement;
+}
+
+
+template <typename Scanner, typename Log>
+Statement PreParser<Scanner, Log>::ParseForStatement(bool* ok) {
+ // ForStatement ::
+ // 'for' '(' Expression? ';' Expression? ';' Expression? ')' Statement
+
+ Expect(Token::FOR, CHECK_OK);
+ Expect(Token::LPAREN, CHECK_OK);
+ if (peek() != Token::SEMICOLON) {
+ if (peek() == Token::VAR || peek() == Token::CONST) {
+ int decl_count;
+ ParseVariableDeclarations(false, &decl_count, CHECK_OK);
+ if (peek() == Token::IN && decl_count == 1) {
+ Expect(Token::IN, CHECK_OK);
+ ParseExpression(true, CHECK_OK);
+ Expect(Token::RPAREN, CHECK_OK);
+
+ ParseStatement(CHECK_OK);
+ return kUnknownStatement;
+ }
+ } else {
+ ParseExpression(false, CHECK_OK);
+ if (peek() == Token::IN) {
+ Expect(Token::IN, CHECK_OK);
+ ParseExpression(true, CHECK_OK);
+ Expect(Token::RPAREN, CHECK_OK);
+
+ ParseStatement(CHECK_OK);
+ return kUnknownStatement;
+ }
+ }
+ }
+
+ // Parsed initializer at this point.
+ Expect(Token::SEMICOLON, CHECK_OK);
+
+ if (peek() != Token::SEMICOLON) {
+ ParseExpression(true, CHECK_OK);
+ }
+ Expect(Token::SEMICOLON, CHECK_OK);
+
+ if (peek() != Token::RPAREN) {
+ ParseExpression(true, CHECK_OK);
+ }
+ Expect(Token::RPAREN, CHECK_OK);
+
+ ParseStatement(CHECK_OK);
+ return kUnknownStatement;
+}
+
+
+template <typename Scanner, typename Log>
+Statement PreParser<Scanner, Log>::ParseThrowStatement(bool* ok) {
+ // ThrowStatement ::
+ // 'throw' [no line terminator] Expression ';'
+
+ Expect(Token::THROW, CHECK_OK);
+ if (scanner_->has_line_terminator_before_next()) {
+ typename Scanner::Location pos = scanner_->location();
+ ReportMessageAt(pos.beg_pos, pos.end_pos,
+ "newline_after_throw", NULL);
+ *ok = false;
+ return NULL;
+ }
+ ParseExpression(true, CHECK_OK);
+ ExpectSemicolon(CHECK_OK);
+
+ return kUnknownStatement;
+}
+
+
+template <typename Scanner, typename Log>
+Statement PreParser<Scanner, Log>::ParseTryStatement(bool* ok) {
+ // TryStatement ::
+ // 'try' Block Catch
+ // 'try' Block Finally
+ // 'try' Block Catch Finally
+ //
+ // Catch ::
+ // 'catch' '(' Identifier ')' Block
+ //
+ // Finally ::
+ // 'finally' Block
+
+ // In preparsing, allow any number of catch/finally blocks, including
zero
+ // of both.
+
+ Expect(Token::TRY, CHECK_OK);
+
+ ParseBlock(CHECK_OK);
+
+ bool catch_or_finally_seen = false;
+ if (peek() == Token::CATCH) {
+ Expect(Token::CATCH, CHECK_OK);
+ Expect(Token::LPAREN, CHECK_OK);
+ ParseIdentifier(CHECK_OK);
+ Expect(Token::RPAREN, CHECK_OK);
+ ParseBlock(CHECK_OK);
+ catch_or_finally_seen = true;
+ }
+ if (peek() == Token::FINALLY) {
+ Expect(Token::FINALLY, CHECK_OK);
+ ParseBlock(CHECK_OK);
+ catch_or_finally_seen = true;
+ }
+ if (!catch_or_finally_seen) {
+ *ok = false;
+ }
+ return kUnknownStatement;
+}
+
+
+template <typename Scanner, typename Log>
+Statement PreParser<Scanner, Log>::ParseDebuggerStatement(bool* ok) {
+ // In ECMA-262 'debugger' is defined as a reserved keyword. In some
browser
+ // contexts this is used as a statement which invokes the debugger as if
a
+ // break point is present.
+ // DebuggerStatement ::
+ // 'debugger' ';'
+
+ Expect(Token::DEBUGGER, CHECK_OK);
+ ExpectSemicolon(CHECK_OK);
+ return kUnknownStatement;
+}
+
+
+// Precedence = 1
+template <typename Scanner, typename Log>
+Expression PreParser<Scanner, Log>::ParseExpression(bool accept_IN, bool*
ok) {
+ // Expression ::
+ // AssignmentExpression
+ // Expression ',' AssignmentExpression
+
+ Expression result = ParseAssignmentExpression(accept_IN, CHECK_OK);
+ while (peek() == Token::COMMA) {
+ Expect(Token::COMMA, CHECK_OK);
+ ParseAssignmentExpression(accept_IN, CHECK_OK);
+ result = kUnknownExpression;
+ }
+ return result;
+}
+
+
+// Precedence = 2
+template <typename Scanner, typename Log>
+Expression PreParser<Scanner, Log>::ParseAssignmentExpression(bool
accept_IN,
+ bool* ok) {
+ // AssignmentExpression ::
+ // ConditionalExpression
+ // LeftHandSideExpression AssignmentOperator AssignmentExpression
+
+ Expression expression = ParseConditionalExpression(accept_IN, CHECK_OK);
+
+ if (!Token::IsAssignmentOp(peek())) {
+ // Parsed conditional expression only (no assignment).
+ return expression;
+ }
+
+ Token::Value op = Next(); // Get assignment operator.
+ ParseAssignmentExpression(accept_IN, CHECK_OK);
+
+ if ((op == Token::ASSIGN) && (expression == kThisPropertyExpression)) {
+ scope_->AddProperty();
+ }
+
+ return kUnknownExpression;
+}
+
+
+// Precedence = 3
+template <typename Scanner, typename Log>
+Expression PreParser<Scanner, Log>::ParseConditionalExpression(bool
accept_IN,
+ bool* ok) {
+ // ConditionalExpression ::
+ // LogicalOrExpression
+ // LogicalOrExpression '?' AssignmentExpression ':'
AssignmentExpression
+
+ // We start using the binary expression parser for prec >= 4 only!
+ Expression expression = ParseBinaryExpression(4, accept_IN, CHECK_OK);
+ if (peek() != Token::CONDITIONAL) return expression;
+ Consume(Token::CONDITIONAL);
+ // In parsing the first assignment expression in conditional
+ // expressions we always accept the 'in' keyword; see ECMA-262,
+ // section 11.12, page 58.
+ ParseAssignmentExpression(true, CHECK_OK);
+ Expect(Token::COLON, CHECK_OK);
+ ParseAssignmentExpression(accept_IN, CHECK_OK);
+ return kUnknownExpression;
+}
+
+
+template <typename Scanner, typename Log>
+int PreParser<Scanner, Log>::Precedence(Token::Value tok, bool accept_IN) {
+ if (tok == Token::IN && !accept_IN)
+ return 0; // 0 precedence will terminate binary expression parsing
+
+ return Token::Precedence(tok);
+}
+
+
+// Precedence >= 4
+template <typename Scanner, typename Log>
+Expression PreParser<Scanner, Log>::ParseBinaryExpression(int prec,
+ bool accept_IN,
+ bool* ok) {
+ Expression result = ParseUnaryExpression(CHECK_OK);
+ for (int prec1 = Precedence(peek(), accept_IN); prec1 >= prec; prec1--) {
+ // prec1 >= 4
+ while (Precedence(peek(), accept_IN) == prec1) {
+ Next();
+ ParseBinaryExpression(prec1 + 1, accept_IN, CHECK_OK);
+ result = kUnknownExpression;
+ }
+ }
+ return result;
+}
+
+
+template <typename Scanner, typename Log>
+Expression PreParser<Scanner, Log>::ParseUnaryExpression(bool* ok) {
+ // UnaryExpression ::
+ // PostfixExpression
+ // 'delete' UnaryExpression
+ // 'void' UnaryExpression
+ // 'typeof' UnaryExpression
+ // '++' UnaryExpression
+ // '--' UnaryExpression
+ // '+' UnaryExpression
+ // '-' UnaryExpression
+ // '~' UnaryExpression
+ // '!' UnaryExpression
+
+ Token::Value op = peek();
+ if (Token::IsUnaryOp(op) || Token::IsCountOp(op)) {
+ op = Next();
+ ParseUnaryExpression(ok);
+ return kUnknownExpression;
+ } else {
+ return ParsePostfixExpression(ok);
+ }
+}
+
+
+template <typename Scanner, typename Log>
+Expression PreParser<Scanner, Log>::ParsePostfixExpression(bool* ok) {
+ // PostfixExpression ::
+ // LeftHandSideExpression ('++' | '--')?
+
+ Expression expression = ParseLeftHandSideExpression(CHECK_OK);
+ if (!scanner_->has_line_terminator_before_next() &&
+ Token::IsCountOp(peek())) {
+ Next();
+ return kUnknownExpression;
+ }
+ return expression;
+}
+
+
+template <typename Scanner, typename Log>
+Expression PreParser<Scanner, Log>::ParseLeftHandSideExpression(bool* ok) {
+ // LeftHandSideExpression ::
+ // (NewExpression | MemberExpression) ...
+
+ Expression result;
+ if (peek() == Token::NEW) {
+ result = ParseNewExpression(CHECK_OK);
+ } else {
+ result = ParseMemberExpression(CHECK_OK);
+ }
+
+ while (true) {
+ switch (peek()) {
+ case Token::LBRACK: {
+ Consume(Token::LBRACK);
+ ParseExpression(true, CHECK_OK);
+ Expect(Token::RBRACK, CHECK_OK);
+ if (result == kThisExpression) {
+ result = kThisPropertyExpression;
+ } else {
+ result = kUnknownExpression;
+ }
+ break;
+ }
+
+ case Token::LPAREN: {
+ ParseArguments(CHECK_OK);
+ result = kUnknownExpression;
+ break;
+ }
+
+ case Token::PERIOD: {
+ Consume(Token::PERIOD);
+ ParseIdentifierName(CHECK_OK);
+ if (result == kThisExpression) {
+ result = kThisPropertyExpression;
+ } else {
+ result = kUnknownExpression;
+ }
+ break;
+ }
+
+ default:
+ return result;
+ }
+ }
+}
+
+
+
+template <typename Scanner, typename Log>
+Expression PreParser<Scanner, Log>::ParseNewPrefix(int* new_count, bool*
ok) {
+ // NewExpression ::
+ // ('new')+ MemberExpression
+
+ // The grammar for new expressions is pretty warped. The keyword
+ // 'new' can either be a part of the new expression (where it isn't
+ // followed by an argument list) or a part of the member expression,
+ // where it must be followed by an argument list. To accommodate
+ // this, we parse the 'new' keywords greedily and keep track of how
+ // many we have parsed. This information is then passed on to the
+ // member expression parser, which is only allowed to match argument
+ // lists as long as it has 'new' prefixes left
+ Expect(Token::NEW, CHECK_OK);
+ *new_count++;
+
+ if (peek() == Token::NEW) {
+ ParseNewPrefix(new_count, CHECK_OK);
+ } else {
+ ParseMemberWithNewPrefixesExpression(new_count, CHECK_OK);
+ }
+
+ if (*new_count > 0) {
+ *new_count--;
+ }
+ return kUnknownExpression;
+}
+
+
+template <typename Scanner, typename Log>
***The diff for this file has been truncated for email.***
=======================================
--- /branches/bleeding_edge/src/parser.cc Wed Oct 27 05:33:48 2010
+++ /branches/bleeding_edge/src/parser.cc Tue Nov 2 00:21:37 2010
@@ -36,6 +36,7 @@
#include "messages.h"
#include "parser.h"
#include "platform.h"
+#include "preparser.h"
#include "runtime.h"
#include "scopeinfo.h"
#include "scopes.h"
@@ -390,27 +391,6 @@
};
-class ParserLog BASE_EMBEDDED {
- public:
- virtual ~ParserLog() { }
-
- // Records the occurrence of a function.
- virtual FunctionEntry LogFunction(int start) { return FunctionEntry(); }
- virtual void LogSymbol(int start, Vector<const char> symbol) {}
- virtual void LogError() { }
- // Return the current position in the function entry log.
- virtual int function_position() { return 0; }
- virtual int symbol_position() { return 0; }
- virtual int symbol_ids() { return 0; }
- virtual void PauseRecording() {}
- virtual void ResumeRecording() {}
- virtual Vector<unsigned> ExtractData() {
- return Vector<unsigned>();
- };
-};
-
-
-
class ConditionalLogPauseScope {
public:
ConditionalLogPauseScope(bool pause, ParserLog* log)
@@ -484,141 +464,65 @@
};
-// Record only functions.
-class PartialParserRecorder: public ParserLog {
- public:
- PartialParserRecorder();
- virtual FunctionEntry LogFunction(int start);
-
- virtual int function_position() { return function_store_.size(); }
-
- virtual void LogError() { }
-
- virtual void LogMessage(Scanner::Location loc,
- const char* message,
- Vector<const char*> args);
-
- virtual Vector<unsigned> ExtractData() {
- int function_size = function_store_.size();
- int total_size = ScriptDataImpl::kHeaderSize + function_size;
- Vector<unsigned> data = Vector<unsigned>::New(total_size);
- preamble_[ScriptDataImpl::kFunctionsSizeOffset] = function_size;
- preamble_[ScriptDataImpl::kSymbolCountOffset] = 0;
- memcpy(data.start(), preamble_, sizeof(preamble_));
- int symbol_start = ScriptDataImpl::kHeaderSize + function_size;
- if (function_size > 0) {
- function_store_.WriteTo(data.SubVector(ScriptDataImpl::kHeaderSize,
- symbol_start));
- }
- return data;
- }
-
- virtual void PauseRecording() {
- pause_count_++;
- is_recording_ = false;
- }
-
- virtual void ResumeRecording() {
- ASSERT(pause_count_ > 0);
- if (--pause_count_ == 0) is_recording_ = !has_error();
- }
-
- protected:
- bool has_error() {
- return static_cast<bool>(preamble_[ScriptDataImpl::kHasErrorOffset]);
- }
- bool is_recording() {
- return is_recording_;
- }
-
- void WriteString(Vector<const char> str);
-
- Collector<unsigned> function_store_;
- unsigned preamble_[ScriptDataImpl::kHeaderSize];
- bool is_recording_;
- int pause_count_;
-
-#ifdef DEBUG
- int prev_start;
-#endif
-};
-
-
-// Record both functions and symbols.
-class CompleteParserRecorder: public PartialParserRecorder {
- public:
- CompleteParserRecorder();
-
- virtual void LogSymbol(int start, Vector<const char> literal) {
- if (!is_recording_) return;
- int hash = vector_hash(literal);
- HashMap::Entry* entry = symbol_table_.Lookup(&literal, hash, true);
- int id = static_cast<int>(reinterpret_cast<intptr_t>(entry->value));
- if (id == 0) {
- // Put (symbol_id_ + 1) into entry and increment it.
- id = ++symbol_id_;
- entry->value = reinterpret_cast<void*>(id);
- Vector<Vector<const char> > symbol = symbol_entries_.AddBlock(1,
literal);
- entry->key = &symbol[0];
- }
- WriteNumber(id - 1);
- }
-
- virtual Vector<unsigned> ExtractData() {
- int function_size = function_store_.size();
- // Add terminator to symbols, then pad to unsigned size.
- int symbol_size = symbol_store_.size();
- int padding = sizeof(unsigned) - (symbol_size % sizeof(unsigned));
- symbol_store_.AddBlock(padding, ScriptDataImpl::kNumberTerminator);
- symbol_size += padding;
- int total_size = ScriptDataImpl::kHeaderSize + function_size
- + (symbol_size / sizeof(unsigned));
- Vector<unsigned> data = Vector<unsigned>::New(total_size);
- preamble_[ScriptDataImpl::kFunctionsSizeOffset] = function_size;
- preamble_[ScriptDataImpl::kSymbolCountOffset] = symbol_id_;
- memcpy(data.start(), preamble_, sizeof(preamble_));
- int symbol_start = ScriptDataImpl::kHeaderSize + function_size;
- if (function_size > 0) {
- function_store_.WriteTo(data.SubVector(ScriptDataImpl::kHeaderSize,
- symbol_start));
- }
- if (!has_error()) {
- symbol_store_.WriteTo(
- Vector<byte>::cast(data.SubVector(symbol_start, total_size)));
- }
- return data;
- }
-
- virtual int symbol_position() { return symbol_store_.size(); }
- virtual int symbol_ids() { return symbol_id_; }
- private:
- static int vector_hash(Vector<const char> string) {
- int hash = 0;
- for (int i = 0; i < string.length(); i++) {
- int c = string[i];
- hash += c;
- hash += (hash << 10);
- hash ^= (hash >> 6);
- }
- return hash;
- }
-
- static bool vector_compare(void* a, void* b) {
- Vector<const char>* string1 = reinterpret_cast<Vector<const char>*
(a);
- Vector<const char>* string2 = reinterpret_cast<Vector<const char>*
(b);
- int length = string1->length();
- if (string2->length() != length) return false;
- return memcmp(string1->start(), string2->start(), length) == 0;
- }
-
- // Write a non-negative number to the symbol store.
- void WriteNumber(int number);
-
- Collector<byte> symbol_store_;
- Collector<Vector<const char> > symbol_entries_;
- HashMap symbol_table_;
- int symbol_id_;
-};
+Vector<unsigned> PartialParserRecorder::ExtractData() {
+ int function_size = function_store_.size();
+ int total_size = ScriptDataImpl::kHeaderSize + function_size;
+ Vector<unsigned> data = Vector<unsigned>::New(total_size);
+ preamble_[ScriptDataImpl::kFunctionsSizeOffset] = function_size;
+ preamble_[ScriptDataImpl::kSymbolCountOffset] = 0;
+ memcpy(data.start(), preamble_, sizeof(preamble_));
+ int symbol_start = ScriptDataImpl::kHeaderSize + function_size;
+ if (function_size > 0) {
+ function_store_.WriteTo(data.SubVector(ScriptDataImpl::kHeaderSize,
+ symbol_start));
+ }
+ return data;
+}
+
+
+void CompleteParserRecorder::LogSymbol(int start, Vector<const char>
literal) {
+ if (!is_recording_) return;
+
+ int hash = vector_hash(literal);
+ HashMap::Entry* entry = symbol_table_.Lookup(&literal, hash, true);
+ int id = static_cast<int>(reinterpret_cast<intptr_t>(entry->value));
+ if (id == 0) {
+ // Put (symbol_id_ + 1) into entry and increment it.
+ id = ++symbol_id_;
+ entry->value = reinterpret_cast<void*>(id);
+ Vector<Vector<const char> > symbol = symbol_entries_.AddBlock(1,
literal);
+ entry->key = &symbol[0];
+ }
+ WriteNumber(id - 1);
+}
+
+
+Vector<unsigned> CompleteParserRecorder::ExtractData() {
+ int function_size = function_store_.size();
+ // Add terminator to symbols, then pad to unsigned size.
+ int symbol_size = symbol_store_.size();
+ int padding = sizeof(unsigned) - (symbol_size % sizeof(unsigned));
+ symbol_store_.AddBlock(padding, ScriptDataImpl::kNumberTerminator);
+ symbol_size += padding;
+ int total_size = ScriptDataImpl::kHeaderSize + function_size
+ + (symbol_size / sizeof(unsigned));
+ Vector<unsigned> data = Vector<unsigned>::New(total_size);
+ preamble_[ScriptDataImpl::kFunctionsSizeOffset] = function_size;
+ preamble_[ScriptDataImpl::kSymbolCountOffset] = symbol_id_;
+ memcpy(data.start(), preamble_, sizeof(preamble_));
+ int symbol_start = ScriptDataImpl::kHeaderSize + function_size;
+ if (function_size > 0) {
+ function_store_.WriteTo(data.SubVector(ScriptDataImpl::kHeaderSize,
+ symbol_start));
+ }
+ if (!has_error()) {
+ symbol_store_.WriteTo(
+ Vector<byte>::cast(data.SubVector(symbol_start, total_size)));
+ }
+ return data;
+}
+
+
FunctionEntry ScriptDataImpl::GetFunctionEntry(int start) {
@@ -691,7 +595,7 @@
preamble_[ScriptDataImpl::kSizeOffset] = 0;
ASSERT_EQ(6, ScriptDataImpl::kHeaderSize);
#ifdef DEBUG
- prev_start = -1;
+ prev_start_ = -1;
#endif
}
@@ -742,8 +646,8 @@
void PartialParserRecorder::LogMessage(Scanner::Location loc,
- const char* message,
- Vector<const char*> args) {
+ const char* message,
+ Vector<const char*> args) {
if (has_error()) return;
preamble_[ScriptDataImpl::kHasErrorOffset] = true;
function_store_.Reset();
@@ -798,18 +702,6 @@
unsigned* ScriptDataImpl::ReadAddress(int position) {
return &store_[ScriptDataImpl::kHeaderSize + position];
}
-
-
-FunctionEntry PartialParserRecorder::LogFunction(int start) {
-#ifdef DEBUG
- ASSERT(start > prev_start);
- prev_start = start;
-#endif
- if (!is_recording_) return FunctionEntry();
- FunctionEntry result(function_store_.AddBlock(FunctionEntry::kSize, 0));
- result.set_start_pos(start);
- return result;
-}
class AstBuildingParser : public Parser {
@@ -1034,26 +926,6 @@
pre_data_(pre_data),
fni_(NULL) {
}
-
-
-bool Parser::PreParseProgram(Handle<String> source,
- unibrow::CharacterStream* stream) {
- HistogramTimerScope timer(&Counters::pre_parse);
- AssertNoZoneAllocation assert_no_zone_allocation;
- AssertNoAllocation assert_no_allocation;
- NoHandleAllocation no_handle_allocation;
- scanner_.Initialize(source, stream, JAVASCRIPT);
- ASSERT(target_stack_ == NULL);
- mode_ = FLAG_lazy ? PARSE_LAZILY : PARSE_EAGERLY;
- if (allow_natives_syntax_ || extension_ != NULL) mode_ = PARSE_EAGERLY;
- DummyScope top_scope;
- LexicalScope scope(&this->top_scope_, &this->with_nesting_level_,
&top_scope);
- TemporaryScope temp_scope(&this->temp_scope_);
- ZoneListWrapper<Statement> processor;
- bool ok = true;
- ParseSourceElements(&processor, Token::EOS, &ok);
- return !scanner().stack_overflow();
-}
FunctionLiteral* Parser::ParseProgram(Handle<String> source,
@@ -1740,7 +1612,9 @@
while (!done) {
ParseIdentifier(CHECK_OK);
done = (peek() == Token::RPAREN);
- if (!done) Expect(Token::COMMA, CHECK_OK);
+ if (!done) {
+ Expect(Token::COMMA, CHECK_OK);
+ }
}
Expect(Token::RPAREN, CHECK_OK);
Expect(Token::SEMICOLON, CHECK_OK);
@@ -3720,7 +3594,6 @@
Expect(Token::RBRACE, CHECK_OK);
} else {
FunctionEntry entry;
- if (is_lazily_compiled) entry =
log()->LogFunction(function_block_pos);
{
ConditionalLogPauseScope pause_if(is_lazily_compiled, log());
ParseSourceElements(&body, Token::RBRACE, CHECK_OK);
@@ -3733,12 +3606,11 @@
Expect(Token::RBRACE, CHECK_OK);
end_pos = scanner_.location().end_pos;
- if (entry.is_valid()) {
- ASSERT(is_lazily_compiled);
+ if (is_pre_parsing_ && is_lazily_compiled) {
ASSERT(is_pre_parsing_);
- entry.set_end_pos(end_pos);
- entry.set_literal_count(materialized_literal_count);
- entry.set_property_count(expected_property_count);
+ log()->LogFunction(function_block_pos, end_pos,
+ materialized_literal_count,
+ expected_property_count);
}
}
@@ -5001,23 +4873,6 @@
bool ScriptDataImpl::HasError() {
return has_error();
}
-
-
-// Preparse, but only collect data that is immediately useful,
-// even if the preparser data is only used once.
-ScriptDataImpl* ParserApi::PartialPreParse(Handle<String> source,
- unibrow::CharacterStream*
stream,
- v8::Extension* extension) {
- Handle<Script> no_script;
- bool allow_natives_syntax =
- FLAG_allow_natives_syntax || Bootstrapper::IsActive();
- PartialPreParser parser(no_script, allow_natives_syntax, extension);
- if (!parser.PreParseProgram(source, stream)) return NULL;
- // Extract the accumulated data from the recorder as a single
- // contiguous vector that we are responsible for disposing.
- Vector<unsigned> store = parser.recorder()->ExtractData();
- return new ScriptDataImpl(store);
-}
void ScriptDataImpl::Initialize() {
@@ -5061,19 +4916,47 @@
*source = data;
return result;
}
+
+
+// Preparse, but only collect data that is immediately useful,
+// even if the preparser data is only used once.
+ScriptDataImpl* ParserApi::PartialPreParse(Handle<String> source,
+ unibrow::CharacterStream*
stream,
+ v8::Extension* extension) {
+ Handle<Script> no_script;
+ preparser::PreParser<Scanner, PartialParserRecorder> parser;
+ Scanner scanner;
+ scanner.Initialize(source, stream, JAVASCRIPT);
+ bool allow_lazy = FLAG_lazy && (extension == NULL);
+ PartialParserRecorder recorder;
+ if (!parser.PreParseProgram(&scanner, &recorder, allow_lazy)) {
+ Top::StackOverflow();
+ return NULL;
+ }
+
+ // Extract the accumulated data from the recorder as a single
+ // contiguous vector that we are responsible for disposing.
+ Vector<unsigned> store = recorder.ExtractData();
+ return new ScriptDataImpl(store);
+}
ScriptDataImpl* ParserApi::PreParse(Handle<String> source,
unibrow::CharacterStream* stream,
v8::Extension* extension) {
Handle<Script> no_script;
- bool allow_natives_syntax =
- FLAG_allow_natives_syntax || Bootstrapper::IsActive();
- CompletePreParser parser(no_script, allow_natives_syntax, extension);
- if (!parser.PreParseProgram(source, stream)) return NULL;
+ preparser::PreParser<Scanner, CompleteParserRecorder> parser;
+ Scanner scanner;
+ scanner.Initialize(source, stream, JAVASCRIPT);
+ bool allow_lazy = FLAG_lazy && (extension == NULL);
+ CompleteParserRecorder recorder;
+ if (!parser.PreParseProgram(&scanner, &recorder, allow_lazy)) {
+ Top::StackOverflow();
+ return NULL;
+ }
// Extract the accumulated data from the recorder as a single
// contiguous vector that we are responsible for disposing.
- Vector<unsigned> store = parser.recorder()->ExtractData();
+ Vector<unsigned> store = recorder.ExtractData();
return new ScriptDataImpl(store);
}
=======================================
--- /branches/bleeding_edge/src/parser.h Wed Oct 27 05:33:48 2010
+++ /branches/bleeding_edge/src/parser.h Tue Nov 2 00:21:37 2010
@@ -177,6 +177,152 @@
};
+class ParserLog BASE_EMBEDDED {
+ public:
+ virtual ~ParserLog() { }
+
+ // Records the occurrence of a function.
+ virtual void LogFunction(int start, int end, int literals, int
properties) {}
+ // Records the occurrence of a symbol in the source. The vector holds the
+ // UTF-8 encoded symbol content.
+ virtual void LogSymbol(int start, Vector<const char> symbol) {}
+ // Records the occurrence of a symbol in the source. The symbol pointer
+ // points to the UTF-8 encoded symbol content.
+ virtual void LogSymbol(int start, const char* symbol, int length) {}
+ // Return the current position in the function entry log.
+ virtual int function_position() { return 0; }
+ // Return the current position in the symbol entry log.
+ // Notice: Functions and symbols are currently logged separately.
+ virtual int symbol_position() { return 0; }
+ // Return the number of distinct symbols logged.
+ virtual int symbol_ids() { return 0; }
+ // Pauses recording. The Log-functions above will do nothing during
pausing.
+ // Pauses can be nested.
+ virtual void PauseRecording() {}
+ // Ends a recording pause.
+ virtual void ResumeRecording() {}
+ // Extracts a representation of the logged data that can be used by
+ // ScriptData.
+ virtual Vector<unsigned> ExtractData() {
+ return Vector<unsigned>();
+ };
+};
+
+
+// Record only functions.
+class PartialParserRecorder: public ParserLog {
+ public:
+ PartialParserRecorder();
+
+ virtual void LogFunction(int start, int end, int literals, int
properties) {
+ function_store_.Add(start);
+ function_store_.Add(end);
+ function_store_.Add(literals);
+ function_store_.Add(properties);
+ }
+
+ // Logs an error message and marks the log as containing an error.
+ // Further logging will be ignored, and ExtractData will return a vector
+ // representing the error only.
+ void LogMessage(int start,
+ int end,
+ const char* message,
+ const char* argument_opt) {
+ Scanner::Location location(start, end);
+ Vector<const char*> arguments;
+ if (argument_opt != NULL) {
+ arguments = Vector<const char*>(&argument_opt, 1);
+ }
+ this->LogMessage(location, message, arguments);
+ }
+
+ virtual int function_position() { return function_store_.size(); }
+
+ virtual void LogMessage(Scanner::Location loc,
+ const char* message,
+ Vector<const char*> args);
+
+ virtual Vector<unsigned> ExtractData();
+
+ virtual void PauseRecording() {
+ pause_count_++;
+ is_recording_ = false;
+ }
+
+ virtual void ResumeRecording() {
+ ASSERT(pause_count_ > 0);
+ if (--pause_count_ == 0) is_recording_ = !has_error();
+ }
+
+ protected:
+ bool has_error() {
+ return static_cast<bool>(preamble_[ScriptDataImpl::kHasErrorOffset]);
+ }
+
+ bool is_recording() {
+ return is_recording_;
+ }
+
+ void WriteString(Vector<const char> str);
+
+ Collector<unsigned> function_store_;
+ unsigned preamble_[ScriptDataImpl::kHeaderSize];
+ bool is_recording_;
+ int pause_count_;
+
+#ifdef DEBUG
+ int prev_start_;
+#endif
+};
+
+
+// Record both functions and symbols.
+class CompleteParserRecorder: public PartialParserRecorder {
+ public:
+ CompleteParserRecorder();
+
+ virtual void LogSymbol(int start, Vector<const char> literal);
+
+ virtual void LogSymbol(int start, const char* symbol, int length) {
+ LogSymbol(start, Vector<const char>(symbol, length));
+ }
+
+ virtual Vector<unsigned> ExtractData();
+
+ virtual int symbol_position() { return symbol_store_.size(); }
+ virtual int symbol_ids() { return symbol_id_; }
+
+ private:
+ static int vector_hash(Vector<const char> string) {
+ int hash = 0;
+ for (int i = 0; i < string.length(); i++) {
+ int c = string[i];
+ hash += c;
+ hash += (hash << 10);
+ hash ^= (hash >> 6);
+ }
+ return hash;
+ }
+
+ static bool vector_compare(void* a, void* b) {
+ Vector<const char>* string1 = reinterpret_cast<Vector<const char>*
(a);
+ Vector<const char>* string2 = reinterpret_cast<Vector<const char>*
(b);
+ int length = string1->length();
+ if (string2->length() != length) return false;
+ return memcmp(string1->start(), string2->start(), length) == 0;
+ }
+
+ // Write a non-negative number to the symbol store.
+ void WriteNumber(int number);
+
+ Collector<byte> symbol_store_;
+ Collector<Vector<const char> > symbol_entries_;
+ HashMap symbol_table_;
+ int symbol_id_;
+};
+
+
+
class ParserApi {
public:
// Parses the source code represented by the compilation info and sets
its
@@ -433,10 +579,6 @@
v8::Extension* extension, ParserMode is_pre_parsing,
ParserFactory* factory, ParserLog* log, ScriptDataImpl* pre_data);
virtual ~Parser() { }
-
- // Pre-parse the program from the character stream; returns true on
- // success, false if a stack-overflow happened during parsing.
- bool PreParseProgram(Handle<String> source, unibrow::CharacterStream*
stream);
void ReportMessage(const char* message, Vector<const char*> args);
virtual void ReportMessageAt(Scanner::Location loc,
=======================================
--- /branches/bleeding_edge/test/cctest/test-parsing.cc Wed Sep 22 06:24:14
2010
+++ /branches/bleeding_edge/test/cctest/test-parsing.cc Tue Nov 2 00:21:37
2010
@@ -26,6 +26,7 @@
// OF THIS SOFTWARE, EVEN IF ADVISED OF THE POSSIBILITY OF SUCH DAMAGE.
#include <stdlib.h>
+#include <stdio.h>
#include "v8.h"
@@ -34,7 +35,8 @@
#include "parser.h"
#include "utils.h"
#include "execution.h"
-
+#include "scanner.h"
+#include "preparser.h"
#include "cctest.h"
namespace i = ::v8::internal;
@@ -239,3 +241,31 @@
i::Vector<const char*> args = pre_impl->BuildArgs();
CHECK_GT(strlen(message), 0);
}
+
+
+TEST(StandAlonePreParser) {
+ int marker;
+ i::StackGuard::SetStackLimit(
+ reinterpret_cast<uintptr_t>(&marker) - 128 * 1024);
+
+ const char* programs[] = {
+ "{label: 42}",
+ "var x = 42;",
+ "function foo(x, y) { return x + y; }",
+ "native function foo(); return %ArgleBargle(glop);",
+ NULL
+ };
+
+ for (int i = 0; programs[i]; i++) {
+ const char* program = programs[i];
+ unibrow::Utf8InputBuffer<256> stream(program, strlen(program));
+ i::CompleteParserRecorder log;
+ i::Scanner scanner;
+ scanner.Initialize(i::Handle<i::String>::null(), &stream,
i::JAVASCRIPT);
+ i::preparser::PreParser<i::Scanner, i::CompleteParserRecorder>
preparser;
+ bool result = preparser.PreParseProgram(&scanner, &log, true);
+ CHECK(result);
+ i::ScriptDataImpl data(log.ExtractData());
+ CHECK(!data.has_error());
+ }
+}
--
v8-dev mailing list
[email protected]
http://groups.google.com/group/v8-dev