Revision: 3752
Author: [email protected]
Date: Mon Feb 1 02:31:55 2010
Log: Added validating JSON parser mode to parser.
Review URL: http://codereview.chromium.org/549207
http://code.google.com/p/v8/source/detail?r=3752
Modified:
/branches/bleeding_edge/src/ast.cc
/branches/bleeding_edge/src/ast.h
/branches/bleeding_edge/src/compiler.cc
/branches/bleeding_edge/src/json-delay.js
/branches/bleeding_edge/src/parser.cc
/branches/bleeding_edge/src/parser.h
/branches/bleeding_edge/src/scanner.cc
/branches/bleeding_edge/src/scanner.h
/branches/bleeding_edge/test/mjsunit/debug-compile-event.js
/branches/bleeding_edge/test/mjsunit/json.js
/branches/bleeding_edge/test/mjsunit/mirror-script.js
/branches/bleeding_edge/test/mjsunit/mjsunit.js
=======================================
--- /branches/bleeding_edge/src/ast.cc Thu Jan 7 11:01:23 2010
+++ /branches/bleeding_edge/src/ast.cc Mon Feb 1 02:31:55 2010
@@ -144,27 +144,6 @@
(kind_ == MATERIALIZED_LITERAL &&
CompileTimeValue::IsCompileTimeValue(value_));
}
-
-
-bool ObjectLiteral::IsValidJSON() {
- int length = properties()->length();
- for (int i = 0; i < length; i++) {
- Property* prop = properties()->at(i);
- if (!prop->value()->IsValidJSON())
- return false;
- }
- return true;
-}
-
-
-bool ArrayLiteral::IsValidJSON() {
- int length = values()->length();
- for (int i = 0; i < length; i++) {
- if (!values()->at(i)->IsValidJSON())
- return false;
- }
- return true;
-}
void TargetCollector::AddTarget(BreakTarget* target) {
=======================================
--- /branches/bleeding_edge/src/ast.h Fri Jan 29 03:55:40 2010
+++ /branches/bleeding_edge/src/ast.h Mon Feb 1 02:31:55 2010
@@ -186,7 +186,6 @@
virtual Expression* AsExpression() { return this; }
- virtual bool IsValidJSON() { return false; }
virtual bool IsValidLeftHandSide() { return false; }
// Symbols that cannot be parsed as array indices are considered property
@@ -712,8 +711,6 @@
bool IsIdenticalTo(const Literal* other) const {
return handle_.is_identical_to(other->handle_);
}
-
- virtual bool IsValidJSON() { return true; }
virtual bool IsPropertyName() {
if (handle_->IsSymbol()) {
@@ -750,8 +747,6 @@
// A materialized literal is simple if the values consist of only
// constants and simple object and array literals.
bool is_simple() const { return is_simple_; }
-
- virtual bool IsValidJSON() { return true; }
int depth() const { return depth_; }
@@ -806,7 +801,6 @@
virtual ObjectLiteral* AsObjectLiteral() { return this; }
virtual void Accept(AstVisitor* v);
- virtual bool IsValidJSON();
Handle<FixedArray> constant_properties() const {
return constant_properties_;
@@ -854,7 +848,6 @@
virtual void Accept(AstVisitor* v);
virtual ArrayLiteral* AsArrayLiteral() { return this; }
- virtual bool IsValidJSON();
Handle<FixedArray> constant_elements() const { return
constant_elements_; }
ZoneList<Expression*>* values() const { return values_; }
=======================================
--- /branches/bleeding_edge/src/compiler.cc Fri Jan 29 07:29:33 2010
+++ /branches/bleeding_edge/src/compiler.cc Mon Feb 1 02:31:55 2010
@@ -119,17 +119,6 @@
return CodeGenerator::MakeCode(literal, script, is_eval, info);
}
-
-
-static bool IsValidJSON(FunctionLiteral* lit) {
- if (lit->body()->length() != 1)
- return false;
- Statement* stmt = lit->body()->at(0);
- if (stmt->AsExpressionStatement() == NULL)
- return false;
- Expression* expr = stmt->AsExpressionStatement()->expression();
- return expr->IsValidJSON();
-}
static Handle<JSFunction> MakeFunction(bool is_global,
@@ -146,8 +135,8 @@
ASSERT(!i::Top::global_context().is_null());
script->set_context_data((*i::Top::global_context())->data());
-#ifdef ENABLE_DEBUGGER_SUPPORT
bool is_json = (validate == Compiler::VALIDATE_JSON);
+#ifdef ENABLE_DEBUGGER_SUPPORT
if (is_eval || is_json) {
script->set_compilation_type(
is_json ? Smi::FromInt(Script::COMPILATION_TYPE_JSON) :
@@ -172,26 +161,14 @@
ASSERT(is_eval || is_global);
// Build AST.
- FunctionLiteral* lit = MakeAST(is_global, script, extension, pre_data);
+ FunctionLiteral* lit =
+ MakeAST(is_global, script, extension, pre_data, is_json);
// Check for parse errors.
if (lit == NULL) {
ASSERT(Top::has_pending_exception());
return Handle<JSFunction>::null();
}
-
- // When parsing JSON we do an ordinary parse and then afterwards
- // check the AST to ensure it was well-formed. If not we give a
- // syntax error.
- if (validate == Compiler::VALIDATE_JSON && !IsValidJSON(lit)) {
- HandleScope scope;
- Handle<JSArray> args = Factory::NewJSArray(1);
- Handle<Object> source(script->source());
- SetElement(args, 0, source);
- Handle<Object> result = Factory::NewSyntaxError("invalid_json", args);
- Top::Throw(*result, NULL);
- return Handle<JSFunction>::null();
- }
// Measure how long it takes to do the compilation; only take the
// rest of the function into account to avoid overlap with the
=======================================
--- /branches/bleeding_edge/src/json-delay.js Wed Sep 9 03:49:40 2009
+++ /branches/bleeding_edge/src/json-delay.js Mon Feb 1 02:31:55 2010
@@ -29,7 +29,7 @@
function ParseJSONUnfiltered(text) {
var s = $String(text);
- var f = %CompileString("(" + text + ")", true);
+ var f = %CompileString(text, true);
return f();
}
=======================================
--- /branches/bleeding_edge/src/parser.cc Tue Jan 19 02:32:20 2010
+++ /branches/bleeding_edge/src/parser.cc Mon Feb 1 02:31:55 2010
@@ -91,7 +91,7 @@
class Parser {
public:
Parser(Handle<Script> script, bool allow_natives_syntax,
- v8::Extension* extension, bool is_pre_parsing,
+ v8::Extension* extension, ParserMode is_pre_parsing,
ParserFactory* factory, ParserLog* log, ScriptDataImpl* pre_data);
virtual ~Parser() { }
@@ -112,6 +112,8 @@
FunctionLiteral* ParseLazy(Handle<String> source,
Handle<String> name,
int start_position, bool is_expression);
+ FunctionLiteral* ParseJson(Handle<String> source,
+ unibrow::CharacterStream* stream);
// The minimum number of contiguous assignment that will
// be treated as an initialization block. Benchmarks show that
@@ -202,7 +204,21 @@
Expression* ParseObjectLiteral(bool* ok);
Expression* ParseRegExpLiteral(bool seen_equal, bool* ok);
- // Decide if a property should be the object boilerplate.
+ // Populate the constant properties fixed array for a materialized object
+ // literal.
+ void BuildObjectLiteralConstantProperties(
+ ZoneList<ObjectLiteral::Property*>* properties,
+ Handle<FixedArray> constants,
+ bool* is_simple,
+ int* depth);
+
+ // Populate the literals fixed array for a materialized array literal.
+ void BuildArrayLiteralBoilerplateLiterals(ZoneList<Expression*>*
properties,
+ Handle<FixedArray> constants,
+ bool* is_simple,
+ int* depth);
+
+ // Decide if a property should be in the object boilerplate.
bool IsBoilerplateProperty(ObjectLiteral::Property* property);
// If the expression is a literal, return the literal value;
// if the expression is a materialized literal and is simple return a
@@ -231,6 +247,7 @@
INLINE(Token::Value Next()) { return scanner_.Next(); }
INLINE(void Consume(Token::Value token));
void Expect(Token::Value token, bool* ok);
+ bool Check(Token::Value token);
void ExpectSemicolon(bool* ok);
// Get odd-ball literals.
@@ -277,6 +294,29 @@
Handle<String> type,
Vector< Handle<Object> > arguments);
+ // JSON is a subset of JavaScript, as specified in, e.g., the ECMAScript
5
+ // specification section 15.12.1 (and appendix A.8).
+ // The grammar is given section 15.12.1.2 (and appendix A.8.2).
+
+ // Parse JSON input as a single JSON value.
+ Expression* ParseJson(bool* ok);
+
+ // Parse a single JSON value from input (grammar production JSONValue).
+ // A JSON value is either a (double-quoted) string literal, a number
literal,
+ // one of "true", "false", or "null", or an object or array literal.
+ Expression* ParseJsonValue(bool* ok);
+ // Parse a JSON object literal (grammar production JSONObject).
+ // An object literal is a squiggly-braced and comma separated sequence
+ // (possibly empty) of key/value pairs, where the key is a JSON string
+ // literal, the value is a JSON value, and the two are spearated by a
colon.
+ // A JavaScript object also allows numbers and identifiers as keys.
+ Expression* ParseJsonObject(bool* ok);
+ // Parses a JSON array literal (grammar production JSONArray). An array
+ // literal is a square-bracketed and comma separated sequence (possibly
empty)
+ // of JSON values.
+ // A JavaScript array allows leaving out values from the sequence.
+ Expression* ParseJsonArray(bool* ok);
+
friend class Target;
friend class TargetScope;
friend class LexicalScope;
@@ -983,7 +1023,7 @@
public:
AstBuildingParser(Handle<Script> script, bool allow_natives_syntax,
v8::Extension* extension, ScriptDataImpl* pre_data)
- : Parser(script, allow_natives_syntax, extension, false,
+ : Parser(script, allow_natives_syntax, extension, PARSE,
factory(), log(), pre_data) { }
virtual void ReportMessageAt(Scanner::Location loc, const char* message,
Vector<const char*> args);
@@ -1002,9 +1042,9 @@
public:
PreParser(Handle<Script> script, bool allow_natives_syntax,
v8::Extension* extension)
- : Parser(script, allow_natives_syntax, extension, true,
- factory(), recorder(), NULL)
- , factory_(true) { }
+ : Parser(script, allow_natives_syntax, extension, PREPARSE,
+ factory(), recorder(), NULL),
+ factory_(true) { }
virtual void ReportMessageAt(Scanner::Location loc, const char* message,
Vector<const char*> args);
virtual VariableProxy* Declare(Handle<String> name, Variable::Mode mode,
@@ -1147,7 +1187,7 @@
Parser::Parser(Handle<Script> script,
bool allow_natives_syntax,
v8::Extension* extension,
- bool is_pre_parsing,
+ ParserMode is_pre_parsing,
ParserFactory* factory,
ParserLog* log,
ScriptDataImpl* pre_data)
@@ -1161,7 +1201,7 @@
extension_(extension),
factory_(factory),
log_(log),
- is_pre_parsing_(is_pre_parsing),
+ is_pre_parsing_(is_pre_parsing == PREPARSE),
pre_data_(pre_data) {
}
@@ -1172,7 +1212,7 @@
AssertNoZoneAllocation assert_no_zone_allocation;
AssertNoAllocation assert_no_allocation;
NoHandleAllocation no_handle_allocation;
- scanner_.Init(source, stream, 0);
+ scanner_.Init(source, stream, 0, JAVASCRIPT);
ASSERT(target_stack_ == NULL);
mode_ = PARSE_EAGERLY;
DummyScope top_scope;
@@ -1195,7 +1235,7 @@
// Initialize parser state.
source->TryFlattenIfNotFlat();
- scanner_.Init(source, stream, 0);
+ scanner_.Init(source, stream, 0, JAVASCRIPT);
ASSERT(target_stack_ == NULL);
// Compute the parsing mode.
@@ -1254,7 +1294,7 @@
SafeStringInputBuffer buffer(source.location());
// Initialize parser state.
- scanner_.Init(source, &buffer, start_position);
+ scanner_.Init(source, &buffer, start_position, JAVASCRIPT);
ASSERT(target_stack_ == NULL);
mode_ = PARSE_EAGERLY;
@@ -1290,6 +1330,55 @@
return result;
}
+FunctionLiteral* Parser::ParseJson(Handle<String> source,
+ unibrow::CharacterStream* stream) {
+ CompilationZoneScope zone_scope(DONT_DELETE_ON_EXIT);
+
+ HistogramTimerScope timer(&Counters::parse);
+ Counters::total_parse_size.Increment(source->length());
+
+ // Initialize parser state.
+ source->TryFlattenIfNotFlat();
+ scanner_.Init(source, stream, 0, JSON);
+ ASSERT(target_stack_ == NULL);
+
+ FunctionLiteral* result = NULL;
+ Handle<String> no_name = factory()->EmptySymbol();
+
+ {
+ Scope* scope = factory()->NewScope(top_scope_, Scope::GLOBAL_SCOPE,
false);
+ LexicalScope lexical_scope(this, scope);
+ TemporaryScope temp_scope(this);
+ bool ok = true;
+ Expression* expression = ParseJson(&ok);
+ if (ok) {
+ ZoneListWrapper<Statement> statement =
factory()->NewList<Statement>(1);
+ statement.Add(new ExpressionStatement(expression));
+ result = NEW(FunctionLiteral(
+ no_name,
+ top_scope_,
+ statement.elements(),
+ temp_scope.materialized_literal_count(),
+ temp_scope.expected_property_count(),
+ temp_scope.only_simple_this_property_assignments(),
+ temp_scope.this_property_assignments(),
+ 0,
+ 0,
+ source->length(),
+ false));
+ } else if (scanner().stack_overflow()) {
+ Top::StackOverflow();
+ }
+ }
+
+ // Make sure the target stack is empty.
+ ASSERT(target_stack_ == NULL);
+
+ // If there was a syntax error we have to get rid of the AST
+ // and it is not safe to do so before the scope has been deleted.
+ if (result == NULL) zone_scope.DeleteOnExit();
+ return result;
+}
void Parser::ReportMessage(const char* type, Vector<const char*> args) {
Scanner::Location source_location = scanner_.location();
@@ -3122,7 +3211,7 @@
void Parser::ReportUnexpectedToken(Token::Value token) {
// We don't report stack overflows here, to avoid increasing the
// stack depth even further. Instead we report it after parsing is
- // over, in ParseProgram.
+ // over, in ParseProgram/ParseJson.
if (token == Token::ILLEGAL && scanner().stack_overflow())
return;
// Four of the tokens are treated specially
@@ -3260,6 +3349,33 @@
return result;
}
+
+
+void Parser::BuildArrayLiteralBoilerplateLiterals(ZoneList<Expression*>*
values,
+ Handle<FixedArray>
literals,
+ bool* is_simple,
+ int* depth) {
+ // Fill in the literals.
+ // Accumulate output values in local variables.
+ bool is_simple_acc = true;
+ int depth_acc = 1;
+ for (int i = 0; i < values->length(); i++) {
+ MaterializedLiteral* m_literal =
values->at(i)->AsMaterializedLiteral();
+ if (m_literal != NULL && m_literal->depth() >= depth_acc) {
+ depth_acc = m_literal->depth() + 1;
+ }
+ Handle<Object> boilerplate_value = GetBoilerplateValue(values->at(i));
+ if (boilerplate_value->IsUndefined()) {
+ literals->set_the_hole(i);
+ is_simple_acc = false;
+ } else {
+ literals->set(i, *boilerplate_value);
+ }
+ }
+
+ *is_simple = is_simple_acc;
+ *depth = depth_acc;
+}
Expression* Parser::ParseArrayLiteral(bool* ok) {
@@ -3362,6 +3478,43 @@
}
return Factory::undefined_value();
}
+
+
+void Parser::BuildObjectLiteralConstantProperties(
+ ZoneList<ObjectLiteral::Property*>* properties,
+ Handle<FixedArray> constant_properties,
+ bool* is_simple,
+ int* depth) {
+ int position = 0;
+ // Accumulate the value in local variables and store it at the end.
+ bool is_simple_acc = true;
+ int depth_acc = 1;
+ for (int i = 0; i < properties->length(); i++) {
+ ObjectLiteral::Property* property = properties->at(i);
+ if (!IsBoilerplateProperty(property)) {
+ is_simple_acc = false;
+ continue;
+ }
+ MaterializedLiteral* m_literal =
property->value()->AsMaterializedLiteral();
+ if (m_literal != NULL && m_literal->depth() >= depth_acc) {
+ depth_acc = m_literal->depth() + 1;
+ }
+
+ // Add CONSTANT and COMPUTED properties to boilerplate. Use undefined
+ // value for COMPUTED properties, the real value is filled in at
+ // runtime. The enumeration order is maintained.
+ Handle<Object> key = property->key()->handle();
+ Handle<Object> value = GetBoilerplateValue(property->value());
+ is_simple_acc = is_simple_acc && !value->IsUndefined();
+
+ // Add name, value pair to the fixed array.
+ constant_properties->set(position++, *key);
+ constant_properties->set(position++, *value);
+ }
+
+ *is_simple = is_simple_acc;
+ *depth = depth_acc;
+}
Expression* Parser::ParseObjectLiteral(bool* ok) {
@@ -3454,32 +3607,13 @@
Handle<FixedArray> constant_properties =
Factory::NewFixedArray(number_of_boilerplate_properties * 2,
TENURED);
- int position = 0;
+
bool is_simple = true;
int depth = 1;
- for (int i = 0; i < properties.length(); i++) {
- ObjectLiteral::Property* property = properties.at(i);
- if (!IsBoilerplateProperty(property)) {
- is_simple = false;
- continue;
- }
- MaterializedLiteral* m_literal =
property->value()->AsMaterializedLiteral();
- if (m_literal != NULL && m_literal->depth() + 1 > depth) {
- depth = m_literal->depth() + 1;
- }
-
- // Add CONSTANT and COMPUTED properties to boilerplate. Use undefined
- // value for COMPUTED properties, the real value is filled in at
- // runtime. The enumeration order is maintained.
- Handle<Object> key = property->key()->handle();
- Handle<Object> value = GetBoilerplateValue(property->value());
- is_simple = is_simple && !value->IsUndefined();
-
- // Add name, value pair to the fixed array.
- constant_properties->set(position++, *key);
- constant_properties->set(position++, *value);
- }
-
+ BuildObjectLiteralConstantProperties(properties.elements(),
+ constant_properties,
+ &is_simple,
+ &depth);
return new ObjectLiteral(constant_properties,
properties.elements(),
literal_index,
@@ -3718,6 +3852,16 @@
ReportUnexpectedToken(next);
*ok = false;
}
+
+
+bool Parser::Check(Token::Value token) {
+ Token::Value next = peek();
+ if (next == token) {
+ Consume(next);
+ return true;
+ }
+ return false;
+}
void Parser::ExpectSemicolon(bool* ok) {
@@ -3886,6 +4030,145 @@
scanner().location().beg_pos);
}
+//
----------------------------------------------------------------------------
+// JSON
+
+Expression* Parser::ParseJson(bool* ok) {
+ Expression* result = ParseJsonValue(CHECK_OK);
+ Expect(Token::EOS, CHECK_OK);
+ return result;
+}
+
+
+// Parse any JSON value.
+Expression* Parser::ParseJsonValue(bool* ok) {
+ Token::Value token = peek();
+ switch (token) {
+ case Token::STRING: {
+ Consume(Token::STRING);
+ int literal_length = scanner_.literal_length();
+ const char* literal_string = scanner_.literal_string();
+ if (literal_length == 0) {
+ return NEW(Literal(Factory::empty_string()));
+ }
+ Vector<const char> literal(literal_string, literal_length);
+ return NEW(Literal(Factory::NewStringFromUtf8(literal, TENURED)));
+ }
+ case Token::NUMBER: {
+ Consume(Token::NUMBER);
+ ASSERT(scanner_.literal_length() > 0);
+ double value = StringToDouble(scanner_.literal_string(),
+ NO_FLAGS, // Hex, octal or trailing
junk.
+ OS::nan_value());
+ return NewNumberLiteral(value);
+ }
+ case Token::FALSE_LITERAL:
+ Consume(Token::FALSE_LITERAL);
+ return NEW(Literal(Factory::false_value()));
+ case Token::TRUE_LITERAL:
+ Consume(Token::TRUE_LITERAL);
+ return NEW(Literal(Factory::true_value()));
+ case Token::NULL_LITERAL:
+ Consume(Token::NULL_LITERAL);
+ return NEW(Literal(Factory::null_value()));
+ case Token::LBRACE: {
+ Expression* result = ParseJsonObject(CHECK_OK);
+ return result;
+ }
+ case Token::LBRACK: {
+ Expression* result = ParseJsonArray(CHECK_OK);
+ return result;
+ }
+ default:
+ *ok = false;
+ ReportUnexpectedToken(token);
+ return NULL;
+ }
+}
+
+
+// Parse a JSON object. Scanner must be right after '{' token.
+Expression* Parser::ParseJsonObject(bool* ok) {
+ Consume(Token::LBRACE);
+ ZoneListWrapper<ObjectLiteral::Property> properties =
+ factory()->NewList<ObjectLiteral::Property>(4);
+ int boilerplate_properties = 0;
+ if (peek() != Token::RBRACE) {
+ do {
+ Expect(Token::STRING, CHECK_OK);
+ Handle<String> key =
factory()->LookupSymbol(scanner_.literal_string(),
+
scanner_.literal_length());
+ Expect(Token::COLON, CHECK_OK);
+ Expression* value = ParseJsonValue(CHECK_OK);
+ Literal* key_literal;
+ uint32_t index;
+ if (key->AsArrayIndex(&index)) {
+ key_literal = NewNumberLiteral(index);
+ } else {
+ key_literal = NEW(Literal(key));
+ }
+ ObjectLiteral::Property* property =
+ NEW(ObjectLiteral::Property(key_literal, value));
+ properties.Add(property);
+
+ if (IsBoilerplateProperty(property)) {
+ boilerplate_properties++;
+ }
+ } while (Check(Token::COMMA));
+ }
+ Expect(Token::RBRACE, CHECK_OK);
+
+ int literal_index = temp_scope_->NextMaterializedLiteralIndex();
+ if (is_pre_parsing_) return NULL;
+
+ Handle<FixedArray> constant_properties =
+ Factory::NewFixedArray(boilerplate_properties * 2, TENURED);
+ bool is_simple = true;
+ int depth = 1;
+ BuildObjectLiteralConstantProperties(properties.elements(),
+ constant_properties,
+ &is_simple,
+ &depth);
+ return new ObjectLiteral(constant_properties,
+ properties.elements(),
+ literal_index,
+ is_simple,
+ depth);
+}
+
+
+// Parse a JSON array. Scanner must be right after '[' token.
+Expression* Parser::ParseJsonArray(bool* ok) {
+ Consume(Token::LBRACK);
+
+ ZoneListWrapper<Expression> values = factory()->NewList<Expression>(4);
+ if (peek() != Token::RBRACK) {
+ do {
+ Expression* exp = ParseJsonValue(CHECK_OK);
+ values.Add(exp);
+ } while (Check(Token::COMMA));
+ }
+ Expect(Token::RBRACK, CHECK_OK);
+
+ // Update the scope information before the pre-parsing bailout.
+ int literal_index = temp_scope_->NextMaterializedLiteralIndex();
+
+ if (is_pre_parsing_) return NULL;
+
+ // Allocate a fixed array with all the literals.
+ Handle<FixedArray> literals =
+ Factory::NewFixedArray(values.length(), TENURED);
+
+ bool is_simple;
+ int depth;
+ BuildArrayLiteralBoilerplateLiterals(values.elements(),
+ literals,
+ &is_simple,
+ &depth);
+ return NEW(ArrayLiteral(literals, values.elements(),
+ literal_index, is_simple, depth));
+}
+
//
----------------------------------------------------------------------------
// Regular expressions
@@ -4761,7 +5044,8 @@
FunctionLiteral* MakeAST(bool compile_in_global_context,
Handle<Script> script,
v8::Extension* extension,
- ScriptDataImpl* pre_data) {
+ ScriptDataImpl* pre_data,
+ bool is_json) {
bool allow_natives_syntax =
always_allow_natives_syntax ||
FLAG_allow_natives_syntax ||
@@ -4773,15 +5057,21 @@
Vector<const char*> args = pre_data->BuildArgs();
parser.ReportMessageAt(loc, message, args);
DeleteArray(message);
- for (int i = 0; i < args.length(); i++)
+ for (int i = 0; i < args.length(); i++) {
DeleteArray(args[i]);
+ }
DeleteArray(args.start());
return NULL;
}
Handle<String> source = Handle<String>(String::cast(script->source()));
SafeStringInputBuffer input(source.location());
- FunctionLiteral* result = parser.ParseProgram(source,
- &input, compile_in_global_context);
+ FunctionLiteral* result;
+ if (is_json) {
+ ASSERT(compile_in_global_context);
+ result = parser.ParseJson(source, &input);
+ } else {
+ result = parser.ParseProgram(source, &input,
compile_in_global_context);
+ }
return result;
}
=======================================
--- /branches/bleeding_edge/src/parser.h Mon Jan 11 04:13:24 2010
+++ /branches/bleeding_edge/src/parser.h Mon Feb 1 02:31:55 2010
@@ -133,7 +133,8 @@
FunctionLiteral* MakeAST(bool compile_in_global_context,
Handle<Script> script,
v8::Extension* extension,
- ScriptDataImpl* pre_data);
+ ScriptDataImpl* pre_data,
+ bool is_json = false);
ScriptDataImpl* PreParse(Handle<String> source,
=======================================
--- /branches/bleeding_edge/src/scanner.cc Tue Nov 10 02:23:23 2009
+++ /branches/bleeding_edge/src/scanner.cc Mon Feb 1 02:31:55 2010
@@ -323,11 +323,14 @@
//
----------------------------------------------------------------------------
// Scanner
-Scanner::Scanner(bool pre) : stack_overflow_(false), is_pre_parsing_(pre)
{ }
+Scanner::Scanner(ParserMode pre)
+ : stack_overflow_(false), is_pre_parsing_(pre == PREPARSE) { }
-void Scanner::Init(Handle<String> source, unibrow::CharacterStream* stream,
- int position) {
+void Scanner::Init(Handle<String> source,
+ unibrow::CharacterStream* stream,
+ int position,
+ ParserLanguage language) {
// Initialize the source buffer.
if (!source.is_null() && StringShape(*source).IsExternalTwoByte()) {
two_byte_string_buffer_.Initialize(
@@ -339,6 +342,7 @@
}
position_ = position;
+ is_parsing_json_ = (language == JSON);
// Set c0_ (one character ahead)
ASSERT(kCharacterLookaheadBufferSize == 1);
@@ -416,8 +420,18 @@
}
-bool Scanner::SkipWhiteSpace() {
+bool Scanner::SkipJsonWhiteSpace() {
int start_position = source_pos();
+ // JSON WhiteSpace is tab, carrige-return, newline and space.
+ while (c0_ == ' ' || c0_ == '\n' || c0_ == '\r' || c0_ == '\t') {
+ Advance();
+ }
+ return source_pos() != start_position;
+}
+
+
+bool Scanner::SkipJavaScriptWhiteSpace() {
+ int start_position = source_pos();
while (true) {
// We treat byte-order marks (BOMs) as whitespace for better
@@ -512,7 +526,194 @@
}
-void Scanner::Scan() {
+
+void Scanner::ScanJson() {
+ next_.literal_buffer = NULL;
+ Token::Value token;
+ has_line_terminator_before_next_ = false;
+ do {
+ // Remember the position of the next token
+ next_.location.beg_pos = source_pos();
+ switch (c0_) {
+ case '\t':
+ case '\r':
+ case '\n':
+ case ' ':
+ Advance();
+ token = Token::WHITESPACE;
+ break;
+ case '{':
+ Advance();
+ token = Token::LBRACE;
+ break;
+ case '}':
+ Advance();
+ token = Token::RBRACE;
+ break;
+ case '[':
+ Advance();
+ token = Token::LBRACK;
+ break;
+ case ']':
+ Advance();
+ token = Token::RBRACK;
+ break;
+ case ':':
+ Advance();
+ token = Token::COLON;
+ break;
+ case ',':
+ Advance();
+ token = Token::COMMA;
+ break;
+ case '"':
+ token = ScanJsonString();
+ break;
+ case '-':
+ case '0':
+ case '1':
+ case '2':
+ case '3':
+ case '4':
+ case '5':
+ case '6':
+ case '7':
+ case '8':
+ case '9':
+ token = ScanJsonNumber();
+ break;
+ case 't':
+ token = ScanJsonIdentifier("true", Token::TRUE_LITERAL);
+ break;
+ case 'f':
+ token = ScanJsonIdentifier("false", Token::FALSE_LITERAL);
+ break;
+ case 'n':
+ token = ScanJsonIdentifier("null", Token::NULL_LITERAL);
+ break;
+ default:
+ if (c0_ < 0) {
+ Advance();
+ token = Token::EOS;
+ } else {
+ Advance();
+ token = Select(Token::ILLEGAL);
+ }
+ }
+ } while (token == Token::WHITESPACE);
+
+ next_.location.end_pos = source_pos();
+ next_.token = token;
+}
+
+
+Token::Value Scanner::ScanJsonString() {
+ ASSERT_EQ('"', c0_);
+ Advance();
+ StartLiteral();
+ while (c0_ != '"' && c0_ > 0) {
+ // Check for control character (0x00-0x1f) or unterminated string (<0).
+ if (c0_ < 0x20) return Token::ILLEGAL;
+ if (c0_ != '\\') {
+ AddCharAdvance();
+ } else {
+ Advance();
+ switch (c0_) {
+ case '"':
+ case '\\':
+ case '/':
+ AddChar(c0_);
+ break;
+ case 'b':
+ AddChar('\x08');
+ break;
+ case 'f':
+ AddChar('\x0c');
+ break;
+ case 'n':
+ AddChar('\x0a');
+ break;
+ case 'r':
+ AddChar('\x0d');
+ break;
+ case 't':
+ AddChar('\x09');
+ break;
+ case 'u': {
+ uc32 value = 0;
+ for (int i = 0; i < 4; i++) {
+ Advance();
+ int digit = HexValue(c0_);
+ if (digit < 0) return Token::ILLEGAL;
+ value = value * 16 + digit;
+ }
+ AddChar(value);
+ break;
+ }
+ default:
+ return Token::ILLEGAL;
+ }
+ Advance();
+ }
+ }
+ if (c0_ != '"') {
+ return Token::ILLEGAL;
+ }
+ TerminateLiteral();
+ Advance();
+ return Token::STRING;
+}
+
+
+Token::Value Scanner::ScanJsonNumber() {
+ StartLiteral();
+ if (c0_ == '-') AddCharAdvance();
+ if (c0_ == '0') {
+ AddCharAdvance();
+ // Prefix zero is only allowed if it's the only digit before
+ // a decimal point or exponent.
+ if ('0' <= c0_ && c0_ <= '9') return Token::ILLEGAL;
+ } else {
+ if (c0_ < '1' || c0_ > '9') return Token::ILLEGAL;
+ do {
+ AddCharAdvance();
+ } while (c0_ >= '0' && c0_ <= '9');
+ }
+ if (c0_ == '.') {
+ AddCharAdvance();
+ if (c0_ < '0' || c0_ > '9') return Token::ILLEGAL;
+ do {
+ AddCharAdvance();
+ } while (c0_ >= '0' && c0_ <= '9');
+ }
+ if ((c0_ | 0x20) == 'e') {
+ AddCharAdvance();
+ if (c0_ == '-' || c0_ == '+') AddCharAdvance();
+ if (c0_ < '0' || c0_ > '9') return Token::ILLEGAL;
+ do {
+ AddCharAdvance();
+ } while (c0_ >= '0' && c0_ <= '9');
+ }
+ TerminateLiteral();
+ return Token::NUMBER;
+}
+
+
+Token::Value Scanner::ScanJsonIdentifier(const char* text,
+ Token::Value token) {
+ StartLiteral();
+ while (*text != '\0') {
+ if (c0_ != *text) return Token::ILLEGAL;
+ Advance();
+ text++;
+ }
+ if (kIsIdentifierPart.get(c0_)) return Token::ILLEGAL;
+ TerminateLiteral();
+ return token;
+}
+
+
+void Scanner::ScanJavaScript() {
next_.literal_buffer = NULL;
Token::Value token;
has_line_terminator_before_next_ = false;
=======================================
--- /branches/bleeding_edge/src/scanner.h Wed Nov 11 01:50:06 2009
+++ /branches/bleeding_edge/src/scanner.h Mon Feb 1 02:31:55 2010
@@ -252,18 +252,22 @@
};
+enum ParserMode { PARSE, PREPARSE };
+enum ParserLanguage { JAVASCRIPT, JSON };
+
+
class Scanner {
public:
-
typedef unibrow::Utf8InputBuffer<1024> Utf8Decoder;
// Construction
- explicit Scanner(bool is_pre_parsing);
+ explicit Scanner(ParserMode parse_mode);
// Initialize the Scanner to scan source:
void Init(Handle<String> source,
unibrow::CharacterStream* stream,
- int position);
+ int position,
+ ParserLanguage language);
// Returns the next token.
Token::Value Next();
@@ -377,6 +381,7 @@
TokenDesc next_; // desc for next token (one token look-ahead)
bool has_line_terminator_before_next_;
bool is_pre_parsing_;
+ bool is_parsing_json_;
// Literal buffer support
void StartLiteral();
@@ -391,14 +396,57 @@
c0_ = ch;
}
- bool SkipWhiteSpace();
+ bool SkipWhiteSpace() {
+ if (is_parsing_json_) {
+ return SkipJsonWhiteSpace();
+ } else {
+ return SkipJavaScriptWhiteSpace();
+ }
+ }
+ bool SkipJavaScriptWhiteSpace();
+ bool SkipJsonWhiteSpace();
Token::Value SkipSingleLineComment();
Token::Value SkipMultiLineComment();
inline Token::Value Select(Token::Value tok);
inline Token::Value Select(uc32 next, Token::Value then, Token::Value
else_);
- void Scan();
+ inline void Scan() {
+ if (is_parsing_json_) {
+ ScanJson();
+ } else {
+ ScanJavaScript();
+ }
+ }
+
+ // Scans a single JavaScript token.
+ void ScanJavaScript();
+
+ // Scan a single JSON token. The JSON lexical grammar is specified in the
+ // ECMAScript 5 standard, section 15.12.1.1.
+ // Recognizes all of the single-character tokens directly, or calls a
function
+ // to scan a number, string or identifier literal.
+ // The only allowed whitespace characters between tokens are tab,
+ // carrige-return, newline and space.
+ void ScanJson();
+
+ // A JSON number (production JSONNumber) is a subset of the valid
JavaScript
+ // decimal number literals.
+ // It includes an optional minus sign, must have at least one
+ // digit before and after a decimal point, may not have prefixed zeros
(unless
+ // the integer part is zero), and may include an exponent part
(e.g., "e-10").
+ // Hexadecimal and octal numbers are not allowed.
+ Token::Value ScanJsonNumber();
+ // A JSON string (production JSONString) is subset of valid JavaScript
string
+ // literals. The string must only be double-quoted (not single-quoted),
and
+ // the only allowed backslash-escapes are ", /, \, b, f, n, r, t and
+ // four-digit hex escapes (uXXXX). Any other use of backslashes is
invalid.
+ Token::Value ScanJsonString();
+ // Used to recognizes one of the literals "true", "false", or "null".
These
+ // are the only valid JSON identifiers (productions JSONBooleanLiteral,
+ // JSONNullLiteral).
+ Token::Value ScanJsonIdentifier(const char* text, Token::Value token);
+
void ScanDecimalDigits();
Token::Value ScanNumber(bool seen_period);
Token::Value ScanIdentifier();
=======================================
--- /branches/bleeding_edge/test/mjsunit/debug-compile-event.js Fri Oct 2
05:47:15 2009
+++ /branches/bleeding_edge/test/mjsunit/debug-compile-event.js Mon Feb 1
02:31:55 2010
@@ -107,7 +107,7 @@
source_count++; // Using eval causes additional compilation event.
compileSource('eval("eval(\'(function(){return a;})\')")');
source_count += 2; // Using eval causes additional compilation event.
-compileSource('JSON.parse("{a:1,b:2}")');
+compileSource('JSON.parse(\'{"a":1,"b":2}\')');
source_count++; // Using JSON.parse causes additional compilation event.
// Make sure that the debug event listener was invoked.
=======================================
--- /branches/bleeding_edge/test/mjsunit/json.js Thu Jan 14 01:05:52 2010
+++ /branches/bleeding_edge/test/mjsunit/json.js Mon Feb 1 02:31:55 2010
@@ -93,20 +93,46 @@
assertFalse(p == "JSON");
// Parse
-
assertEquals({}, JSON.parse("{}"));
+assertEquals({42:37}, JSON.parse('{"42":37}'));
assertEquals(null, JSON.parse("null"));
assertEquals(true, JSON.parse("true"));
assertEquals(false, JSON.parse("false"));
assertEquals("foo", JSON.parse('"foo"'));
assertEquals("f\no", JSON.parse('"f\\no"'));
+assertEquals("\b\f\n\r\t\"\u2028\/\\",
+ JSON.parse('"\\b\\f\\n\\r\\t\\"\\u2028\\/\\\\"'));
+assertEquals([1.1], JSON.parse("[1.1]"));
+assertEquals([1], JSON.parse("[1.0]"));
+
+assertEquals(0, JSON.parse("0"));
+assertEquals(1, JSON.parse("1"));
+assertEquals(0.1, JSON.parse("0.1"));
assertEquals(1.1, JSON.parse("1.1"));
-assertEquals(1, JSON.parse("1.0"));
-assertEquals(0.0000000003, JSON.parse("3e-10"));
+assertEquals(1.1, JSON.parse("1.100000"));
+assertEquals(1.111111, JSON.parse("1.111111"));
+assertEquals(-0, JSON.parse("-0"));
+assertEquals(-1, JSON.parse("-1"));
+assertEquals(-0.1, JSON.parse("-0.1"));
+assertEquals(-1.1, JSON.parse("-1.1"));
+assertEquals(-1.1, JSON.parse("-1.100000"));
+assertEquals(-1.111111, JSON.parse("-1.111111"));
+assertEquals(11, JSON.parse("1.1e1"));
+assertEquals(11, JSON.parse("1.1e+1"));
+assertEquals(0.11, JSON.parse("1.1e-1"));
+assertEquals(11, JSON.parse("1.1E1"));
+assertEquals(11, JSON.parse("1.1E+1"));
+assertEquals(0.11, JSON.parse("1.1E-1"));
+
assertEquals([], JSON.parse("[]"));
assertEquals([1], JSON.parse("[1]"));
assertEquals([1, "2", true, null], JSON.parse('[1, "2", true, null]'));
+assertEquals("", JSON.parse('""'));
+assertEquals(["", "", -0, ""], JSON.parse('[ "" , "" ,
-0, ""]'));
+assertEquals("", JSON.parse('""'));
+
+
function GetFilter(name) {
function Filter(key, value) {
return (key == name) ? undefined : value;
@@ -145,6 +171,64 @@
TestInvalid("[1, 2");
TestInvalid('{"x": 3');
+// JavaScript number literals not valid in JSON.
+TestInvalid('[01]');
+TestInvalid('[.1]');
+TestInvalid('[1.]');
+TestInvalid('[1.e1]');
+TestInvalid('[-.1]');
+TestInvalid('[-1.]');
+
+// Plain invalid number literals.
+TestInvalid('-');
+TestInvalid('--1');
+TestInvalid('-1e');
+TestInvalid('1e--1]');
+TestInvalid('1e+-1');
+TestInvalid('1e-+1');
+TestInvalid('1e++1');
+
+// JavaScript string literals not valid in JSON.
+TestInvalid("'single quote'"); // Valid JavaScript
+TestInvalid('"\\a invalid escape"');
+TestInvalid('"\\v invalid escape"'); // Valid JavaScript
+TestInvalid('"\\\' invalid escape"'); // Valid JavaScript
+TestInvalid('"\\x42 invalid escape"'); // Valid JavaScript
+TestInvalid('"\\u202 invalid escape"');
+TestInvalid('"\\012 invalid escape"');
+TestInvalid('"Unterminated string');
+TestInvalid('"Unterminated string\\"');
+TestInvalid('"Unterminated string\\\\\\"');
+
+// Test bad JSON that would be good JavaScript (ES5).
+
+TestInvalid("{true:42}");
+TestInvalid("{false:42}");
+TestInvalid("{null:42}");
+TestInvalid("{'foo':42}");
+TestInvalid("{42:42}");
+TestInvalid("{0:42}");
+TestInvalid("{-1:42}");
+
+// Test for trailing garbage detection.
+
+TestInvalid('42 px');
+TestInvalid('42 .2');
+TestInvalid('42 2');
+TestInvalid('42 e1');
+TestInvalid('"42" ""');
+TestInvalid('"42" ""');
+TestInvalid('"" ""');
+TestInvalid('true ""');
+TestInvalid('false ""');
+TestInvalid('null ""');
+TestInvalid('null ""');
+TestInvalid('[] ""');
+TestInvalid('[true] ""');
+TestInvalid('{} ""');
+TestInvalid('{"x":true} ""');
+TestInvalid('"Garbage""After string"');
+
// Stringify
assertEquals("true", JSON.stringify(true));
@@ -196,12 +280,8 @@
assertEquals(undefined, JSON.stringify(undefined));
assertEquals(undefined, JSON.stringify(function () { }));
-function checkIllegal(str) {
- assertThrows(function () { JSON.parse(str); }, SyntaxError);
-}
-
-checkIllegal('1); throw "foo"; (1');
+TestInvalid('1); throw "foo"; (1');
var x = 0;
eval("(1); x++; (1)");
-checkIllegal('1); x++; (1');
+TestInvalid('1); x++; (1');
=======================================
--- /branches/bleeding_edge/test/mjsunit/mirror-script.js Fri Oct 2
05:47:15 2009
+++ /branches/bleeding_edge/test/mjsunit/mirror-script.js Mon Feb 1
02:31:55 2010
@@ -87,8 +87,8 @@
testScriptMirror(Math.sin, 'native math.js', -1, 0, 0);
testScriptMirror(eval('(function(){})'), null, 1, 2, 1, '(function(){})',
87);
testScriptMirror(eval('(function(){\n })'), null, 2, 2,
1, '(function(){\n })', 88);
-testScriptMirror(%CompileString("({a:1,b:2})", true), null, 1, 2,
2, '({a:1,b:2})');
-testScriptMirror(%CompileString("({a:1,\n b:2})", true), null, 2, 2,
2, '({a:1,\n b:2})');
+testScriptMirror(%CompileString('{"a":1,"b":2}', true), null, 1, 2,
2, '{"a":1,"b":2}');
+testScriptMirror(%CompileString('{"a":1,\n "b":2}', true), null, 2, 2,
2, '{"a":1,\n "b":2}');
// Test taking slices of source.
var mirror = debug.MakeMirror(eval('(function(){\n 1;\n})')).script();
=======================================
--- /branches/bleeding_edge/test/mjsunit/mjsunit.js Thu Nov 5 08:08:48 2009
+++ /branches/bleeding_edge/test/mjsunit/mjsunit.js Mon Feb 1 02:31:55 2010
@@ -75,6 +75,7 @@
if (typeof a == "number" && typeof b == "number" && isNaN(a) &&
isNaN(b)) {
return true;
}
+ if (a == null || b == null) return false;
if (a.constructor === RegExp || b.constructor === RegExp) {
return (a.constructor === b.constructor) && (a.toString ===
b.toString);
}
--
v8-dev mailing list
[email protected]
http://groups.google.com/group/v8-dev