Diff
Modified: trunk/Source/_javascript_Core/ChangeLog (87176 => 87177)
--- trunk/Source/_javascript_Core/ChangeLog 2011-05-24 18:46:55 UTC (rev 87176)
+++ trunk/Source/_javascript_Core/ChangeLog 2011-05-24 18:49:18 UTC (rev 87177)
@@ -1,3 +1,43 @@
+2011-05-24 Oliver Hunt <[email protected]>
+
+ Reviewed by Geoffrey Garen.
+
+ Avoid creating unnecessary identifiers and strings in the syntax checker
+ https://bugs.webkit.org/show_bug.cgi?id=61378
+
+ Selectively tell the lexer that there are some places it does not need to
+ do the real work of creating Identifiers for IDENT and STRING tokens.
+
+ Make parseString and parseIdentifier templatized on whether they should
+ do real work, or merely validate the tokens.
+
+ SunSpider --parse-only reports ~5-8% win depending on hardware.
+
+ * parser/ASTBuilder.h:
+ (JSC::ASTBuilder::createDotAccess):
+ * parser/JSParser.cpp:
+ (JSC::JSParser::next):
+ (JSC::JSParser::consume):
+ (JSC::JSParser::parseVarDeclarationList):
+ (JSC::JSParser::parseConstDeclarationList):
+ (JSC::JSParser::parseExpression):
+ (JSC::JSParser::parseAssignmentExpression):
+ (JSC::JSParser::parseConditionalExpression):
+ (JSC::JSParser::parseBinaryExpression):
+ (JSC::JSParser::parseProperty):
+ (JSC::JSParser::parseObjectLiteral):
+ (JSC::JSParser::parseArrayLiteral):
+ (JSC::JSParser::parseArguments):
+ (JSC::JSParser::parseMemberExpression):
+ * parser/Lexer.cpp:
+ (JSC::Lexer::parseIdentifier):
+ (JSC::Lexer::parseString):
+ (JSC::Lexer::lex):
+ * parser/Lexer.h:
+ * parser/SyntaxChecker.h:
+ (JSC::SyntaxChecker::createDotAccess):
+ (JSC::SyntaxChecker::createProperty):
+
2011-05-23 Michael Saboff <[email protected]>
Reviewed by Mark Rowe.
Modified: trunk/Source/_javascript_Core/parser/ASTBuilder.h (87176 => 87177)
--- trunk/Source/_javascript_Core/parser/ASTBuilder.h 2011-05-24 18:46:55 UTC (rev 87176)
+++ trunk/Source/_javascript_Core/parser/ASTBuilder.h 2011-05-24 18:49:18 UTC (rev 87177)
@@ -109,6 +109,8 @@
static const bool CreatesAST = true;
static const bool NeedsFreeVariableInfo = true;
static const bool CanUseFunctionCache = true;
+ static const int DontBuildKeywords = 0;
+ static const int DontBuildStrings = 0;
ExpressionNode* makeBinaryNode(int token, std::pair<ExpressionNode*, BinaryOpInfo>, std::pair<ExpressionNode*, BinaryOpInfo>);
ExpressionNode* makeFunctionCallNode(ExpressionNode* func, ArgumentsNode* args, int start, int divot, int end);
@@ -209,9 +211,9 @@
return node;
}
- ExpressionNode* createDotAccess(ExpressionNode* base, const Identifier& property, int start, int divot, int end)
+ ExpressionNode* createDotAccess(ExpressionNode* base, const Identifier* property, int start, int divot, int end)
{
- DotAccessorNode* node = new (m_globalData) DotAccessorNode(m_globalData, base, property);
+ DotAccessorNode* node = new (m_globalData) DotAccessorNode(m_globalData, base, *property);
setExceptionLocation(node, start, divot, end);
return node;
}
Modified: trunk/Source/_javascript_Core/parser/JSParser.cpp (87176 => 87177)
--- trunk/Source/_javascript_Core/parser/JSParser.cpp 2011-05-24 18:46:55 UTC (rev 87176)
+++ trunk/Source/_javascript_Core/parser/JSParser.cpp 2011-05-24 18:49:18 UTC (rev 87177)
@@ -49,6 +49,7 @@
#define failIfTrueIfStrict(cond) do { if ((cond) && strictMode()) fail(); } while (0)
#define failIfFalseIfStrict(cond) do { if ((!(cond)) && strictMode()) fail(); } while (0)
#define consumeOrFail(tokenType) do { if (!consume(tokenType)) fail(); } while (0)
+#define consumeOrFailWithFlags(tokenType, flags) do { if (!consume(tokenType, flags)) fail(); } while (0)
#define matchOrFail(tokenType) do { if (!match(tokenType)) fail(); } while (0)
#define failIfStackOverflow() do { failIfFalse(canRecurse()); } while (0)
@@ -98,7 +99,7 @@
bool m_isLoop;
};
- void next(Lexer::LexType lexType = Lexer::IdentifyReservedWords)
+ void next(unsigned lexType = 0)
{
m_lastLine = m_token.m_info.line;
m_lastTokenEnd = m_token.m_info.endOffset;
@@ -111,11 +112,11 @@
return m_lexer->nextTokenIsColon();
}
- bool consume(JSTokenType expected)
+ bool consume(JSTokenType expected, unsigned flags = 0)
{
bool result = m_token.m_type == expected;
failIfFalse(result);
- next();
+ next(flags);
return result;
}
@@ -796,7 +797,7 @@
if (hasInitializer) {
int varDivot = tokenStart() + 1;
initStart = tokenStart();
- next(); // consume '='
+ next(TreeBuilder::DontBuildStrings); // consume '='
int initialAssignments = m_assignmentCount;
TreeExpression initializer = parseAssignmentExpression(context);
initEnd = lastTokenEnd();
@@ -828,7 +829,7 @@
context.addVar(name, DeclarationStacks::IsConstant | (hasInitializer ? DeclarationStacks::HasInitializer : 0));
TreeExpression initializer = 0;
if (hasInitializer) {
- next(); // consume '='
+ next(TreeBuilder::DontBuildStrings); // consume '='
initializer = parseAssignmentExpression(context);
}
tail = context.appendConstDecl(tail, name, initializer);
@@ -1552,7 +1553,7 @@
failIfFalse(right);
typename TreeBuilder::Comma commaNode = context.createCommaExpr(node, right);
while (match(COMMA)) {
- next();
+ next(TreeBuilder::DontBuildStrings);
right = parseAssignmentExpression(context);
failIfFalse(right);
context.appendToComma(commaNode, right);
@@ -1597,7 +1598,7 @@
context.assignmentStackAppend(assignmentStack, lhs, start, tokenStart(), m_assignmentCount, op);
start = tokenStart();
m_assignmentCount++;
- next();
+ next(TreeBuilder::DontBuildStrings);
if (strictMode() && m_lastIdentifier && context.isResolve(lhs)) {
failIfTrueIfStrict(m_globalData->propertyNames->eval == *m_lastIdentifier);
failIfTrueIfStrict(m_globalData->propertyNames->arguments == *m_lastIdentifier);
@@ -1630,9 +1631,9 @@
return cond;
m_nonTrivialExpressionCount++;
m_nonLHSCount++;
- next();
+ next(TreeBuilder::DontBuildStrings);
TreeExpression lhs = parseAssignmentExpression(context);
- consumeOrFail(COLON);
+ consumeOrFailWithFlags(COLON, TreeBuilder::DontBuildStrings);
TreeExpression rhs = parseAssignmentExpression(context);
failIfFalse(rhs);
@@ -1670,7 +1671,7 @@
m_nonTrivialExpressionCount++;
m_nonLHSCount++;
int operatorToken = m_token.m_type;
- next();
+ next(TreeBuilder::DontBuildStrings);
while (operatorStackDepth && context.operatorStackHasHigherPrecedence(operatorStackDepth, precedence)) {
ASSERT(operandStackDepth > 1);
@@ -1705,7 +1706,11 @@
wasIdent = true;
case STRING: {
const Identifier* ident = m_token.m_data.ident;
- next(Lexer::IgnoreReservedWords);
+ if (complete || (wasIdent && (*ident == m_globalData->propertyNames->get || *ident == m_globalData->propertyNames->set)))
+ next(Lexer::IgnoreReservedWords);
+ else
+ next(Lexer::IgnoreReservedWords | TreeBuilder::DontBuildKeywords);
+
if (match(COLON)) {
next();
TreeExpression node = parseAssignmentExpression(context);
@@ -1747,7 +1752,7 @@
template <class TreeBuilder> TreeExpression JSParser::parseObjectLiteral(TreeBuilder& context)
{
int startOffset = m_token.m_data.intValue;
- consumeOrFail(OPENBRACE);
+ consumeOrFailWithFlags(OPENBRACE, TreeBuilder::DontBuildStrings);
if (match(CLOSEBRACE)) {
next();
@@ -1764,7 +1769,7 @@
TreePropertyList propertyList = context.createPropertyList(property);
TreePropertyList tail = propertyList;
while (match(COMMA)) {
- next();
+ next(TreeBuilder::DontBuildStrings);
// allow extra comma, see http://bugs.webkit.org/show_bug.cgi?id=5939
if (match(CLOSEBRACE))
break;
@@ -1829,15 +1834,15 @@
template <class TreeBuilder> TreeExpression JSParser::parseArrayLiteral(TreeBuilder& context)
{
- consumeOrFail(OPENBRACKET);
+ consumeOrFailWithFlags(OPENBRACKET, TreeBuilder::DontBuildStrings);
int elisions = 0;
while (match(COMMA)) {
- next();
+ next(TreeBuilder::DontBuildStrings);
elisions++;
}
if (match(CLOSEBRACKET)) {
- next();
+ next(TreeBuilder::DontBuildStrings);
return context.createArray(elisions);
}
@@ -1847,7 +1852,7 @@
typename TreeBuilder::ElementList tail = elementList;
elisions = 0;
while (match(COMMA)) {
- next();
+ next(TreeBuilder::DontBuildStrings);
elisions = 0;
while (match(COMMA)) {
@@ -1856,7 +1861,7 @@
}
if (match(CLOSEBRACKET)) {
- next();
+ next(TreeBuilder::DontBuildStrings);
return context.createArray(elisions, elementList);
}
TreeExpression elem = parseAssignmentExpression(context);
@@ -1948,9 +1953,9 @@
template <class TreeBuilder> TreeArguments JSParser::parseArguments(TreeBuilder& context)
{
- consumeOrFail(OPENPAREN);
+ consumeOrFailWithFlags(OPENPAREN, TreeBuilder::DontBuildStrings);
if (match(CLOSEPAREN)) {
- next();
+ next(TreeBuilder::DontBuildStrings);
return context.createArguments();
}
TreeExpression firstArg = parseAssignmentExpression(context);
@@ -1959,7 +1964,7 @@
TreeArgumentsList argList = context.createArgumentsList(firstArg);
TreeArgumentsList tail = argList;
while (match(COMMA)) {
- next();
+ next(TreeBuilder::DontBuildStrings);
TreeExpression arg = parseAssignmentExpression(context);
failIfFalse(arg);
tail = context.createArgumentsList(tail, arg);
@@ -2033,9 +2038,9 @@
case DOT: {
m_nonTrivialExpressionCount++;
int expressionEnd = lastTokenEnd();
- next(Lexer::IgnoreReservedWords);
+ next(Lexer::IgnoreReservedWords | TreeBuilder::DontBuildKeywords);
matchOrFail(IDENT);
- base = context.createDotAccess(base, *m_token.m_data.ident, expressionStart, expressionEnd, tokenEnd());
+ base = context.createDotAccess(base, m_token.m_data.ident, expressionStart, expressionEnd, tokenEnd());
next();
break;
}
Modified: trunk/Source/_javascript_Core/parser/Lexer.cpp (87176 => 87177)
--- trunk/Source/_javascript_Core/parser/Lexer.cpp 2011-05-24 18:46:55 UTC (rev 87176)
+++ trunk/Source/_javascript_Core/parser/Lexer.cpp 2011-05-24 18:49:18 UTC (rev 87177)
@@ -399,7 +399,7 @@
record16(UChar(static_cast<unsigned short>(c)));
}
-ALWAYS_INLINE JSTokenType Lexer::parseIdentifier(JSTokenData* lvalp, LexType lexType)
+template <bool shouldCreateIdentifier> ALWAYS_INLINE JSTokenType Lexer::parseIdentifier(JSTokenData* lvalp, unsigned lexType)
{
bool bufferRequired = false;
const UChar* identifierStart = currentCharacter();
@@ -426,24 +426,31 @@
return ERRORTOK;
if (UNLIKELY(m_buffer16.size() ? !isIdentPart(character) : !isIdentStart(character)))
return ERRORTOK;
- record16(character);
+ if (shouldCreateIdentifier)
+ record16(character);
identifierStart = currentCharacter();
}
- if (!bufferRequired)
- identifierLength = currentCharacter() - identifierStart;
- else {
- if (identifierStart != currentCharacter())
- m_buffer16.append(identifierStart, currentCharacter() - identifierStart);
- identifierStart = m_buffer16.data();
- identifierLength = m_buffer16.size();
- }
+ const Identifier* ident = 0;
+ if (shouldCreateIdentifier) {
+ if (!bufferRequired)
+ identifierLength = currentCharacter() - identifierStart;
+ else {
+ if (identifierStart != currentCharacter())
+ m_buffer16.append(identifierStart, currentCharacter() - identifierStart);
+ identifierStart = m_buffer16.data();
+ identifierLength = m_buffer16.size();
+ }
- const Identifier* ident = makeIdentifier(identifierStart, identifierLength);
- lvalp->ident = ident;
+ ident = makeIdentifier(identifierStart, identifierLength);
+ lvalp->ident = ident;
+ } else
+ lvalp->ident = 0;
+
m_delimited = false;
- if (LIKELY(!bufferRequired && lexType == IdentifyReservedWords)) {
+ if (LIKELY(!bufferRequired && !(lexType & IgnoreReservedWords))) {
+ ASSERT(shouldCreateIdentifier);
// Keywords must not be recognized if there was an \uXXXX in the identifier.
const HashEntry* entry = m_keywordTable.entry(m_globalData, *ident);
return entry ? static_cast<JSTokenType>(entry->lexerValue()) : IDENT;
@@ -453,7 +460,7 @@
return IDENT;
}
-ALWAYS_INLINE bool Lexer::parseString(JSTokenData* lvalp, bool strictMode)
+template <bool shouldBuildStrings> ALWAYS_INLINE bool Lexer::parseString(JSTokenData* lvalp, bool strictMode)
{
int stringQuoteCharacter = m_current;
shift();
@@ -462,7 +469,7 @@
while (m_current != stringQuoteCharacter) {
if (UNLIKELY(m_current == '\\')) {
- if (stringStart != currentCharacter())
+ if (stringStart != currentCharacter() && shouldBuildStrings)
m_buffer16.append(stringStart, currentCharacter() - stringStart);
shift();
@@ -470,7 +477,8 @@
// Most common escape sequences first
if (escape) {
- record16(escape);
+ if (shouldBuildStrings)
+ record16(escape);
shift();
} else if (UNLIKELY(isLineTerminator(m_current)))
shiftLineTerminator();
@@ -479,18 +487,21 @@
if (isASCIIHexDigit(m_current) && isASCIIHexDigit(peek(1))) {
int prev = m_current;
shift();
- record16(convertHex(prev, m_current));
+ if (shouldBuildStrings)
+ record16(convertHex(prev, m_current));
shift();
- } else
+ } else if (shouldBuildStrings)
record16('x');
} else if (m_current == 'u') {
shift();
int character = getUnicodeCharacter();
- if (character != -1)
- record16(character);
- else if (m_current == stringQuoteCharacter)
- record16('u');
- else // Only stringQuoteCharacter allowed after \u
+ if (character != -1) {
+ if (shouldBuildStrings)
+ record16(character);
+ } else if (m_current == stringQuoteCharacter) {
+ if (shouldBuildStrings)
+ record16('u');
+ } else // Only stringQuoteCharacter allowed after \u
return false;
} else if (strictMode && isASCIIDigit(m_current)) {
// The only valid numeric escape in strict mode is '\0', and this must not be followed by a decimal digit.
@@ -498,7 +509,8 @@
shift();
if (character1 != '0' || isASCIIDigit(m_current))
return false;
- record16(0);
+ if (shouldBuildStrings)
+ record16(0);
} else if (!strictMode && isASCIIOctalDigit(m_current)) {
// Octal character sequences
int character1 = m_current;
@@ -508,14 +520,20 @@
int character2 = m_current;
shift();
if (character1 >= '0' && character1 <= '3' && isASCIIOctalDigit(m_current)) {
- record16((character1 - '0') * 64 + (character2 - '0') * 8 + m_current - '0');
+ if (shouldBuildStrings)
+ record16((character1 - '0') * 64 + (character2 - '0') * 8 + m_current - '0');
shift();
- } else
- record16((character1 - '0') * 8 + character2 - '0');
- } else
- record16(character1 - '0');
+ } else {
+ if (shouldBuildStrings)
+ record16((character1 - '0') * 8 + character2 - '0');
+ }
+ } else {
+ if (shouldBuildStrings)
+ record16(character1 - '0');
+ }
} else if (m_current != -1) {
- record16(m_current);
+ if (shouldBuildStrings)
+ record16(m_current);
shift();
} else
return false;
@@ -535,9 +553,13 @@
shift();
}
- if (currentCharacter() != stringStart)
+ if (currentCharacter() != stringStart && shouldBuildStrings)
m_buffer16.append(stringStart, currentCharacter() - stringStart);
- lvalp->ident = makeIdentifier(m_buffer16.data(), m_buffer16.size());
+ if (shouldBuildStrings)
+ lvalp->ident = makeIdentifier(m_buffer16.data(), m_buffer16.size());
+ else
+ lvalp->ident = 0;
+
m_buffer16.resize(0);
return true;
}
@@ -712,7 +734,7 @@
return code < m_codeEnd && *code == ':';
}
-JSTokenType Lexer::lex(JSTokenData* lvalp, JSTokenInfo* llocp, LexType lexType, bool strictMode)
+JSTokenType Lexer::lex(JSTokenData* lvalp, JSTokenInfo* llocp, unsigned lexType, bool strictMode)
{
ASSERT(!m_error);
ASSERT(m_buffer8.isEmpty());
@@ -1025,8 +1047,13 @@
m_delimited = false;
break;
case CharacterQuote:
- if (UNLIKELY(!parseString(lvalp, strictMode)))
- goto returnError;
+ if (lexType & DontBuildStrings) {
+ if (UNLIKELY(!parseString<false>(lvalp, strictMode)))
+ goto returnError;
+ } else {
+ if (UNLIKELY(!parseString<true>(lvalp, strictMode)))
+ goto returnError;
+ }
shift();
m_delimited = false;
token = STRING;
@@ -1035,7 +1062,10 @@
ASSERT(isIdentStart(m_current));
// Fall through into CharacterBackSlash.
case CharacterBackSlash:
- token = parseIdentifier(lvalp, lexType);
+ if (lexType & DontBuildKeywords)
+ token = parseIdentifier<false>(lvalp, lexType);
+ else
+ token = parseIdentifier<true>(lvalp, lexType);
break;
case CharacterLineTerminator:
ASSERT(isLineTerminator(m_current));
Modified: trunk/Source/_javascript_Core/parser/Lexer.h (87176 => 87177)
--- trunk/Source/_javascript_Core/parser/Lexer.h 2011-05-24 18:46:55 UTC (rev 87176)
+++ trunk/Source/_javascript_Core/parser/Lexer.h 2011-05-24 18:49:18 UTC (rev 87177)
@@ -52,8 +52,12 @@
bool isReparsing() const { return m_isReparsing; }
// Functions for the parser itself.
- enum LexType { IdentifyReservedWords, IgnoreReservedWords };
- JSTokenType lex(JSTokenData* lvalp, JSTokenInfo* llocp, LexType, bool strictMode);
+ enum LexType {
+ IgnoreReservedWords = 1,
+ DontBuildStrings = 2,
+ DontBuildKeywords = 4
+ };
+ JSTokenType lex(JSTokenData* lvalp, JSTokenInfo* llocp, unsigned, bool strictMode);
bool nextTokenIsColon();
int lineNumber() const { return m_lineNumber; }
void setLastLineNumber(int lastLineNumber) { m_lastLineNumber = lastLineNumber; }
@@ -109,8 +113,8 @@
ALWAYS_INLINE bool lastTokenWasRestrKeyword() const;
- ALWAYS_INLINE JSTokenType parseIdentifier(JSTokenData*, LexType);
- ALWAYS_INLINE bool parseString(JSTokenData* lvalp, bool strictMode);
+ template <bool shouldBuildIdentifiers> ALWAYS_INLINE JSTokenType parseIdentifier(JSTokenData*, unsigned);
+ template <bool shouldBuildStrings> ALWAYS_INLINE bool parseString(JSTokenData* lvalp, bool strictMode);
ALWAYS_INLINE void parseHex(double& returnValue);
ALWAYS_INLINE bool parseOctal(double& returnValue);
ALWAYS_INLINE bool parseDecimal(double& returnValue);
Modified: trunk/Source/_javascript_Core/parser/SyntaxChecker.h (87176 => 87177)
--- trunk/Source/_javascript_Core/parser/SyntaxChecker.h 2011-05-24 18:46:55 UTC (rev 87176)
+++ trunk/Source/_javascript_Core/parser/SyntaxChecker.h 2011-05-24 18:49:18 UTC (rev 87177)
@@ -26,6 +26,7 @@
#ifndef SyntaxChecker_h
#define SyntaxChecker_h
+#include "Lexer.h"
#include <yarr/YarrSyntaxChecker.h>
namespace JSC {
@@ -113,6 +114,8 @@
static const bool CreatesAST = false;
static const bool NeedsFreeVariableInfo = false;
static const bool CanUseFunctionCache = true;
+ static const unsigned DontBuildKeywords = Lexer::DontBuildKeywords;
+ static const unsigned DontBuildStrings = Lexer::DontBuildStrings;
int createSourceElements() { return 1; }
ExpressionType makeFunctionCallNode(int, int, int, int, int) { return CallExpr; }
@@ -139,7 +142,7 @@
ExpressionType createBoolean(bool) { return BoolExpr; }
ExpressionType createNull() { return NullExpr; }
ExpressionType createBracketAccess(ExpressionType, ExpressionType, bool, int, int, int) { return BracketExpr; }
- ExpressionType createDotAccess(ExpressionType, const Identifier&, int, int, int) { return DotExpr; }
+ ExpressionType createDotAccess(ExpressionType, const Identifier*, int, int, int) { return DotExpr; }
ExpressionType createRegExp(const Identifier& pattern, const Identifier&, int) { return Yarr::checkSyntax(pattern.ustring()) ? 0 : RegExpExpr; }
ExpressionType createNewExpr(ExpressionType, int, int, int, int) { return NewExpr; }
ExpressionType createNewExpr(ExpressionType, int, int) { return NewExpr; }
@@ -153,9 +156,9 @@
int createArgumentsList(int, int) { return 1; }
template <bool complete> Property createProperty(const Identifier* name, int, PropertyNode::Type type)
{
- ASSERT(name);
if (!complete)
return Property(type);
+ ASSERT(name);
return Property(name, type);
}
template <bool complete> Property createProperty(JSGlobalData* globalData, double name, int, PropertyNode::Type type)