# New Ticket Created by
# Please include the string: [perl #124333]
# in the subject line of all future correspondence about this issue.
# <URL: https://rt.perl.org/Ticket/Display.html?id=124333 >
OS: Ubuntu 14.04 LTS under VirtualBox
Host: Windows 8, Intel Core i5
Rakudo: This is perl6 version 2015.03-317-g37ec24f built on MoarVM
version 2015.03-133-ga300558
The attached file is a radically-trimmed ECMAScript grammar which
segfaults the p6 compiler routinely. The original is at
https://github.com/drforr/perl6-ecmascript if you would like to see it.
By 'radically trimmed' I mean there's little there that resembles the
original grammar. I suspect the root cause of the problem is the
UnicodeLetter token. The original grammar is from the ANTLR repository,
and ANTLR doesn't have a way to represent Unicode properties. Otherwise
they would have used the moral equivalent of \p{Letter} instead of the
huge set of alternations.
As it stands, run the enclosed file and the p6 compiler segfaults. Alter
anything, including removing a test (even those that fail, and there are
a lot of them) and the segfault goes away. It's taken me the better part
of a day to trim the example down to this point, and it's now past the
point of diminishing returns for me to continue to work on it. Basically
it's waiting for someone that can debug it in a different way than I've
been doing. My instinct says it's Unicode related, but I'd also look at
the unaryExpression term. There are a few points of left-recursion in
the grammar and those are always suspect.
use v6;
grammar Grammar::ECMAScript {
rule TOP { <sourceElements> }
rule sourceElements { <expressionStatement>+ }
rule functionDeclaration {
'function' <Identifier> <formalParameterList> <functionBody>
}
rule functionExpression {
'function' <Identifier>? <formalParameterList> <functionBody>
}
rule formalParameterList { '(' <Identifier>* ')' }
rule functionBody { '{' <sourceElements> '}' }
rule statementList { <expressionStatement> }
rule variableDeclarationList { <variableDeclaration>+ % ',' }
rule variableDeclaration { <Identifier> <initialiser>? }
rule variableDeclarationNoIn { <Identifier> <initialiserNoIn>? }
rule initialiser { '=' <assignmentExpression> }
rule initialiserNoIn { '=' <assignmentExpressionNoIn> }
rule expressionStatement { <expression> ';' }
rule whileStatement { 'while' '(' <expression> ')' <expressionStatement> }
rule forStatementInitialiserPart { <expressionNoIn> }
rule forInStatement {
'for' '(' <leftHandSideExpression> 'in' <expression> ')'
}
rule forInStatementInitialiserPart { <leftHandSideExpression> }
rule returnStatement { 'return' <expression>? ';' }
rule withStatement { 'with' '(' <expression> ')' <expressionStatement> }
rule labelledStatement { <Identifier> ':' <expressionStatement> }
rule throwStatement { 'throw' <expression> }
rule expression { <assignmentExpression>+ % ',' }
rule expressionNoIn { <assignmentExpressionNoIn>+ % ',' }
rule assignmentExpression {
<conditionalExpression> | <leftHandSideExpression> '='
}
rule assignmentExpressionNoIn { <conditionalExpressionNoIn> }
rule leftHandSideExpression {
<primaryExpression> <arguments> | <primaryExpression>
}
rule arguments { '(' <assignmentExpression>* % ',' ')' }
rule indexSuffix { '[' <expression> ']' }
rule propertyReferenceSuffix { '.' <Identifier> }
rule conditionalExpression { <relationalExpression> }
rule relationalExpression { <shiftExpression> }
rule shiftExpression { <multiplicativeExpression>+ }
rule multiplicativeExpression { <unaryExpression> }
rule conditionalExpressionNoIn { <logicalORExpressionNoIn> }
rule logicalORExpressionNoIn { <relationalExpressionNoIn> }
rule relationalExpressionNoIn { <shiftExpression> }
rule unaryExpression {
<postfixExpression>
| [ 'delete' | 'void' | 'typeof' | '++' | '--' | '+' | '-' | '~' | '!' ]
<unaryExpression>
}
rule postfixExpression { <leftHandSideExpression> [ '++' | '--' ]? }
rule primaryExpression {
| <Identifier>
| <literal>
| '(' <expression> ')'
}
token literal { '.'? <[0..9]> }
token Identifier { <UnicodeLetter>+ }
token UnicodeLetter {
<[\x[0041]..\x[005A]]>
| <[\x[0061]..\x[007A]]>
| <[\x[0388]..\x[038A]]>
| <[\x[038E]..\x[03A1]]>
| <[\x[03A3]..\x[03CE]]>
| <[\x[03D0]..\x[03D7]]>
| <[\x[03DA]..\x[03F3]]>
| <[\x[0400]..\x[0481]]>
| <[\x[048C]..\x[04C4]]>
| <[\x[04C7]..\x[04C8]]>
| <[\x[04CB]..\x[04CC]]>
| <[\x[04D0]..\x[04F5]]>
| <[\x[04F8]..\x[04F9]]>
| <[\x[0531]..\x[0556]]>
| <[\x[06E5]..\x[06E6]]>
| <[\x[06FA]..\x[06FC]]>
| <[\x[1312]..\x[1315]]>
| <[\x[1318]..\x[131E]]>
| <[\x[1320]..\x[1346]]>
| <[\x[1348]..\x[135A]]>
| <[\x[13A0]..\x[13B0]]>
| <[\x[13B1]..\x[13F4]]>
| <[\x[1401]..\x[1676]]>
| <[\x[1681]..\x[169A]]>
| <[\x[16A0]..\x[16EA]]>
| <[\x[1780]..\x[17B3]]>
| <[\x[1820]..\x[1877]]>
| <[\x[1880]..\x[18A8]]>
| <[\x[1E00]..\x[1E9B]]>
| <[\x[1EA0]..\x[1EE0]]>
| <[\x[1EE1]..\x[1EF9]]>
| <[\x[1F00]..\x[1F15]]>
| <[\x[1F18]..\x[1F1D]]>
| <[\x[1F20]..\x[1F39]]>
| <[\x[1F3A]..\x[1F45]]>
| <[\x[1F48]..\x[1F4D]]>
| <[\x[1F50]..\x[1F57]]>
| <[\x[210A]..\x[2113]]>
| <[\x[2119]..\x[211D]]>
| <[\x[212A]..\x[212D]]>
| <[\x[212F]..\x[2131]]>
| <[\x[2133]..\x[2139]]>
| <[\x[2160]..\x[2183]]>
| <[\x[3005]..\x[3007]]>
| <[\x[3021]..\x[3029]]>
| <[\x[3031]..\x[3035]]>
| <[\x[3038]..\x[303A]]>
| <[\x[3041]..\x[3094]]>
| <[\x[309D]..\x[309E]]>
| <[\x[30A1]..\x[30FA]]>
| <[\x[30FC]..\x[30FE]]>
| <[\x[3105]..\x[312C]]>
| <[\x[3131]..\x[318E]]>
| <[\x[31A0]..\x[31B7]]>
| <[\x[A000]..\x[A48C]]>
| <[\x[F900]..\x[FA2D]]>
| <[\x[FB00]..\x[FB06]]>
| <[\x[FB13]..\x[FB17]]>
| <[\x[FB1F]..\x[FB28]]>
| <[\x[FB2A]..\x[FB36]]>
| <[\x[FB38]..\x[FB3C]]>
| <[\x[FB40]..\x[FB41]]>
| <[\x[FB43]..\x[FB44]]>
| <[\x[FB46]..\x[FBB1]]>
| <[\x[FBD3]..\x[FD3D]]>
| <[\x[FD50]..\x[FD8F]]>
| <[\x[FD92]..\x[FDC7]]>
| <[\x[FDF0]..\x[FDFB]]>
| <[\x[FE70]..\x[FE72]]>
| <[\x[FE76]..\x[FEFC]]>
| <[\x[FF21]..\x[FF3A]]>
| <[\x[FF41]..\x[FF5A]]>
| <[\x[FF66]..\x[FFBE]]>
| <[\x[FFC2]..\x[FFC7]]>
| <[\x[FFCA]..\x[FFCF]]>
| <[\x[FFD2]..\x[FFD7]]>
| <[\x[FFDA]..\x[FFDC]]>
| \x[038C]
| \x[0559]
| \x[06D5]
| \x[0710]
| \x[1310]
| \x[2115]
| \x[2124]
| \x[2126]
| \x[2128]
| \x[3400]
| \x[4DB5]
| \x[4E00]
| \x[9FA5]
| \x[AC00]
| \x[D7A3]
| \x[FB1D]
| \x[FB3E]
| \x[FE74]
}
}
use Test;
my $g = Grammar::ECMAScript.new;
##############################################################################
ok $g.parse( q{for(var a in b)a++;}, rule => 'forInStatement' );
ok $g.parse( q{var a}, rule => 'forInStatementInitialiserPart' );
ok $g.parse( q{(a)}, rule => 'formalParameterList' );
ok $g.parse( q{( a)}, rule => 'formalParameterList' );
ok $g.parse( q{( $a, \u0000a )}, rule => 'formalParameterList' );
ok $g.parse( q{var a}, rule => 'forStatementInitialiserPart' );
ok $g.parse( q{function a(){a++;}}, rule => 'functionDeclaration' );
ok $g.parse( q{function(){a++;}}, rule => 'functionExpression' );
ok $g.parse( q{a}, rule => 'Identifier' );
ok $g.parse( q{ab}, rule => 'Identifier' );
ok $g.parse( q{$a}, rule => 'Identifier' );
ok $g.parse( q{a_}, rule => 'Identifier' );
ok $g.parse( q{\u0000a}, rule => 'Identifier' );
ok $g.parse( q{[a]}, rule => 'indexSuffix' );
ok $g.parse( qq{[\na]}, rule => 'indexSuffix' );
ok $g.parse( qq{[a\n]}, rule => 'indexSuffix' );
ok $g.parse( qq{[\na\n]}, rule => 'indexSuffix' );
ok $g.parse( q{=a in b}, rule => 'initialiser' );
nok $g.parse( q{=a in b}, rule => 'initialiserNoIn' );
ok $g.parse( q{a:b;}, rule => 'labelledStatement' );
nok $g.parse( q{a||b in c}, rule => 'logicalORExpressionNoIn' );
ok $g.parse( q{a||b}, rule => 'logicalORExpressionNoIn' );
ok $g.parse( q{a*b}, rule => 'multiplicativeExpression' );
ok $g.parse( q{a++}, rule => 'postfixExpression' );
ok $g.parse( q{.a_}, rule => 'propertyReferenceSuffix' );
ok $g.parse( q{. a_}, rule => 'propertyReferenceSuffix' );
ok $g.parse( qq{.\na_}, rule => 'propertyReferenceSuffix' );
ok $g.parse( q{a<b}, rule => 'relationalExpression' );
ok $g.parse( q{a in b}, rule => 'relationalExpression' );
nok $g.parse( q{a in b}, rule => 'relationalExpressionNoIn' );
ok $g.parse( q{a<b}, rule => 'relationalExpressionNoIn' );
ok $g.parse( q{return a_;}, rule => 'returnStatement' );
ok $g.parse( qq{return a_\n}, rule => 'returnStatement' );
ok $g.parse( q{a<<b}, rule => 'shiftExpression' );
ok $g.parse( q{a++;}, rule => 'sourceElements' );
ok $g.parse( qq{a++;\nb<3;}, rule => 'sourceElements' );
ok $g.parse( q{a++;}, rule => 'statementList' );
ok $g.parse( q{throw a_;}, rule => 'throwStatement' );
ok $g.parse( qq{throw a_\n}, rule => 'throwStatement' );
ok $g.parse( q{a++;}, rule => 'TOP' );
ok $g.parse( q{void a}, rule => 'unaryExpression' );
nok $g.parse( q{}, rule => 'UnicodeLetter' );
nok $g.parse( q{9}, rule => 'UnicodeLetter' );
nok $g.parse( qq{\x[00ab]}, rule => 'UnicodeLetter' );
ok $g.parse( q{a}, rule => 'UnicodeLetter' );
ok $g.parse( qq{\x[04cb]}, rule => 'UnicodeLetter' );
ok $g.parse( q{a=1}, rule => 'variableDeclarationList' );
ok $g.parse( q{a=1,b=a}, rule => 'variableDeclarationList' );
ok $g.parse( q{A}, rule => 'variableDeclarationNoIn' );
ok $g.parse( q{A=1}, rule => 'variableDeclarationNoIn' );
ok $g.parse( q{a=c in b}, rule => 'variableDeclarationNoIn' );
ok $g.parse( q{while(1)a++;}, rule => 'whileStatement' );
ok $g.parse( q{with(1)a++;}, rule => 'withStatement' );