http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/c04fc99f/src/main/java/org/apache/sysml/parser/pydml/PyDMLParserWrapper.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/parser/pydml/PyDMLParserWrapper.java b/src/main/java/org/apache/sysml/parser/pydml/PyDMLParserWrapper.java new file mode 100644 index 0000000..25ee006 --- /dev/null +++ b/src/main/java/org/apache/sysml/parser/pydml/PyDMLParserWrapper.java @@ -0,0 +1,283 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.sysml.parser.pydml; + +import java.io.ByteArrayInputStream; +import java.io.FileNotFoundException; +import java.io.IOException; +import java.io.InputStream; +import java.util.HashMap; +import java.util.Map; + +import org.antlr.v4.runtime.ANTLRInputStream; +import org.antlr.v4.runtime.BailErrorStrategy; +import org.antlr.v4.runtime.CommonTokenStream; +import org.antlr.v4.runtime.DefaultErrorStrategy; +import org.antlr.v4.runtime.atn.PredictionMode; +import org.antlr.v4.runtime.misc.ParseCancellationException; +import org.antlr.v4.runtime.tree.ParseTree; +import org.antlr.v4.runtime.tree.ParseTreeWalker; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.sysml.api.DMLScript; +import org.apache.sysml.parser.AParserWrapper; +import org.apache.sysml.parser.DMLProgram; +import org.apache.sysml.parser.ForStatement; +import org.apache.sysml.parser.ForStatementBlock; +import org.apache.sysml.parser.FunctionStatementBlock; +import org.apache.sysml.parser.IfStatement; +import org.apache.sysml.parser.IfStatementBlock; +import org.apache.sysml.parser.ImportStatement; +import org.apache.sysml.parser.LanguageException; +import org.apache.sysml.parser.ParForStatement; +import org.apache.sysml.parser.ParForStatementBlock; +import org.apache.sysml.parser.ParseException; +import org.apache.sysml.parser.Statement; +import org.apache.sysml.parser.StatementBlock; +import org.apache.sysml.parser.WhileStatement; +import org.apache.sysml.parser.WhileStatementBlock; +import org.apache.sysml.parser.dml.DMLParserWrapper; +import org.apache.sysml.parser.pydml.PydmlParser.FunctionStatementContext; +import org.apache.sysml.parser.pydml.PydmlParser.PmlprogramContext; +import org.apache.sysml.parser.pydml.PydmlParser.StatementContext; +import org.apache.sysml.parser.pydml.PydmlSyntacticErrorListener.CustomDmlErrorListener; + +/** + * Logic of this wrapper is similar to DMLParserWrapper. + * + * Note: ExpressionInfo and StatementInfo are simply wrapper objects and are reused in both DML and PyDML parsers. + * + */ +public class PyDMLParserWrapper extends AParserWrapper +{ + private static final Log LOG = LogFactory.getLog(DMLScript.class.getName()); + + /** + * Custom wrapper to convert statement into statement blocks. Called by doParse and in PydmlSyntacticValidator for for, parfor, while, ... + * @param current a statement + * @return corresponding statement block + */ + public static StatementBlock getStatementBlock(org.apache.sysml.parser.Statement current) { + StatementBlock blk = null; + if(current instanceof ParForStatement) { + blk = new ParForStatementBlock(); + blk.addStatement(current); + } + else if(current instanceof ForStatement) { + blk = new ForStatementBlock(); + blk.addStatement(current); + } + else if(current instanceof IfStatement) { + blk = new IfStatementBlock(); + blk.addStatement(current); + } + else if(current instanceof WhileStatement) { + blk = new WhileStatementBlock(); + blk.addStatement(current); + } + else { + // This includes ImportStatement + blk = new StatementBlock(); + blk.addStatement(current); + } + return blk; + } + + /** + * Parses the passed file with command line parameters. You can either pass both (local file) or just dmlScript (hdfs) or just file name (import command) + * @param fileName either full path or null --> only used for better error handling + * @param dmlScript required + * @param argVals + * @return + * @throws ParseException + */ + @Override + public DMLProgram parse(String fileName, String dmlScript, HashMap<String,String> argVals) throws ParseException { + DMLProgram prog = null; + + if(dmlScript == null || dmlScript.trim().isEmpty()) { + throw new ParseException("Incorrect usage of parse. Please pass dmlScript not just filename"); + } + + // Set the pipeline required for ANTLR parsing + PyDMLParserWrapper parser = new PyDMLParserWrapper(); + prog = parser.doParse(fileName, dmlScript, argVals); + + if(prog == null) { + throw new ParseException("One or more errors found during parsing. (could not construct AST for file: " + fileName + "). Cannot proceed ahead."); + } + return prog; + } + + /** + * This function is supposed to be called directly only from PydmlSyntacticValidator when it encounters 'import' + * @param fileName + * @return null if atleast one error + */ + public DMLProgram doParse(String fileName, String dmlScript, HashMap<String,String> argVals) throws ParseException { + DMLProgram dmlPgm = null; + + ANTLRInputStream in; + try { + if(dmlScript == null) { + dmlScript = DMLParserWrapper.readDMLScript(fileName); + } + + InputStream stream = new ByteArrayInputStream(dmlScript.getBytes()); + in = new org.antlr.v4.runtime.ANTLRInputStream(stream); + } + catch (FileNotFoundException e) { + throw new ParseException("ERROR: Cannot find file:" + fileName, e); + } + catch (IOException e) { + throw new ParseException("ERROR: Cannot open file:" + fileName, e); + } + catch (LanguageException e) { + throw new ParseException("ERROR: " + e.getMessage(), e); + } + + PmlprogramContext ast = null; + CustomDmlErrorListener errorListener = new CustomDmlErrorListener(); + + try { + PydmlLexer lexer = new PydmlLexer(in); + CommonTokenStream tokens = new CommonTokenStream(lexer); + PydmlParser antlr4Parser = new PydmlParser(tokens); + + boolean tryOptimizedParsing = false; // For now no optimization, since it is not able to parse integer value. + + if(tryOptimizedParsing) { + // Try faster and simpler SLL + antlr4Parser.getInterpreter().setPredictionMode(PredictionMode.SLL); + antlr4Parser.removeErrorListeners(); + antlr4Parser.setErrorHandler(new BailErrorStrategy()); + try{ + ast = antlr4Parser.pmlprogram(); + // If successful, no need to try out full LL(*) ... SLL was enough + } + catch(ParseCancellationException ex) { + // Error occurred, so now try full LL(*) for better error messages + tokens.reset(); + antlr4Parser.reset(); + if(fileName != null) { + errorListener.pushCurrentFileName(fileName); + } + else { + errorListener.pushCurrentFileName("MAIN_SCRIPT"); + } + // Set our custom error listener + antlr4Parser.addErrorListener(errorListener); + antlr4Parser.setErrorHandler(new DefaultErrorStrategy()); + antlr4Parser.getInterpreter().setPredictionMode(PredictionMode.LL); + ast = antlr4Parser.pmlprogram(); + } + } + else { + // Set our custom error listener + antlr4Parser.removeErrorListeners(); + antlr4Parser.addErrorListener(errorListener); + errorListener.pushCurrentFileName(fileName); + + // Now do the parsing + ast = antlr4Parser.pmlprogram(); + } + } + catch(Exception e) { + throw new ParseException("ERROR: Cannot parse the program:" + fileName, e); + } + + + try { + // Now convert the parse tree into DMLProgram + // Do syntactic validation while converting + ParseTree tree = ast; + // And also do syntactic validation + ParseTreeWalker walker = new ParseTreeWalker(); + PydmlSyntacticValidatorHelper helper = new PydmlSyntacticValidatorHelper(errorListener); + PydmlSyntacticValidator validator = new PydmlSyntacticValidator(helper, fileName, argVals); + walker.walk(validator, tree); + errorListener.popFileName(); + if(errorListener.isAtleastOneError()) { + return null; + } + dmlPgm = createDMLProgram(ast); + } + catch(Exception e) { + throw new ParseException("ERROR: Cannot translate the parse tree into DMLProgram" + e.getMessage(), e); + } + + return dmlPgm; + } + + + private DMLProgram createDMLProgram(PmlprogramContext ast) { + + DMLProgram dmlPgm = new DMLProgram(); + + // First add all the functions + for(FunctionStatementContext fn : ast.functionBlocks) { + FunctionStatementBlock functionStmtBlk = new FunctionStatementBlock(); + functionStmtBlk.addStatement(fn.info.stmt); + try { + // TODO: currently the logic of nested namespace is not clear. + String namespace = DMLProgram.DEFAULT_NAMESPACE; + dmlPgm.addFunctionStatementBlock(namespace, fn.info.functionName, functionStmtBlk); + } catch (LanguageException e) { + LOG.error("line: " + fn.start.getLine() + ":" + fn.start.getCharPositionInLine() + " cannot process the function " + fn.info.functionName); + return null; + } + } + + // Then add all the statements + for(StatementContext stmtCtx : ast.blocks) { + Statement current = stmtCtx.info.stmt; + if(current == null) { + LOG.error("line: " + stmtCtx.start.getLine() + ":" + stmtCtx.start.getCharPositionInLine() + " cannot process the statement"); + return null; + } + + // Ignore Newline logic + if(current.isEmptyNewLineStatement()) { + continue; + } + + if(current instanceof ImportStatement) { + // Handle import statements separately + if(stmtCtx.info.namespaces != null) { + // Add the DMLProgram entries into current program + for(Map.Entry<String, DMLProgram> entry : stmtCtx.info.namespaces.entrySet()) { + dmlPgm.getNamespaces().put(entry.getKey(), entry.getValue()); + } + } + else { + LOG.error("line: " + stmtCtx.start.getLine() + ":" + stmtCtx.start.getCharPositionInLine() + " cannot process the import statement"); + return null; + } + } + + // Now wrap statement into individual statement block + // merge statement will take care of merging these blocks + dmlPgm.addStatementBlock(getStatementBlock(current)); + } + + dmlPgm.mergeStatementBlocks(); + return dmlPgm; + } +}
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/c04fc99f/src/main/java/org/apache/sysml/parser/pydml/Pydml.g4 ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/parser/pydml/Pydml.g4 b/src/main/java/org/apache/sysml/parser/pydml/Pydml.g4 new file mode 100644 index 0000000..425859c --- /dev/null +++ b/src/main/java/org/apache/sysml/parser/pydml/Pydml.g4 @@ -0,0 +1,384 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +grammar Pydml; + +@header +{ + // package org.apache.sysml.parser.pydml; + //import org.apache.sysml.parser.dml.StatementInfo; + //import org.apache.sysml.parser.dml.ExpressionInfo; +} + +// This antlr grammar is based on Python 3.3 language reference: https://docs.python.org/3.3/reference/grammar.html + +tokens { INDENT, DEDENT } + +@lexer::members { + private boolean debugIndentRules = false; + + // Indentation level stack + private java.util.Stack<Integer> indents = new java.util.Stack<Integer>(); + + // Extra tokens queue (see the NEWLINE rule). + private java.util.Queue<Token> tokens = new java.util.LinkedList<Token>(); + + // Number of opened braces, brackets and parenthesis. + private int opened = 0; + + // This is only used to set the line number for dedent + private Token lastToken = null; + + + @Override + public void emit(Token t) { + if(debugIndentRules) + System.out.println("Emitted token:" + t); + + super.setToken(t); + tokens.offer(t); + } + + + @Override + public Token nextToken() { + if (_input.LA(1) == EOF && !this.indents.isEmpty()) { + if(debugIndentRules) + System.out.println("EOF reached and expecting some DEDENTS, so emitting them"); + + tokens.poll(); + this.emit(commonToken(PydmlParser.NEWLINE, "\n")); + + // Now emit as much DEDENT tokens as needed. + while (!indents.isEmpty()) { + if(debugIndentRules) + System.out.println("Emitting (inserted) DEDENTS"); + + this.emit(createDedent()); + indents.pop(); + } + // Put the EOF back on the token stream. + this.emit(commonToken(PydmlParser.EOF, "<EOF>")); + } + Token next = super.nextToken(); + if (next.getChannel() == Token.DEFAULT_CHANNEL) { + // Keep track of the last token on the default channel. + this.lastToken = next; + } + Token retVal = tokens.isEmpty() ? next : tokens.poll(); + + if(debugIndentRules) + System.out.println("Returning nextToken: [" + retVal + "]<<" + tokens.isEmpty()); + + return retVal; + } + + private Token createDedent() { + CommonToken dedent = commonToken(PydmlParser.DEDENT, ""); + dedent.setLine(this.lastToken.getLine()); + return dedent; + } + + private CommonToken commonToken(int type, String text) { + // Nike: Main change: This logic was screwed up and was emitting additional 3 characters, so commenting it for now. + // int start = this.getCharIndex(); + // int stop = start + text.length(); + // return new CommonToken(this._tokenFactorySourcePair, type, DEFAULT_TOKEN_CHANNEL, start, stop); + return new CommonToken(type, text); // Main change + } + + // Calculates the indentation level from the spaces: + // "Tabs are replaced (from left to right) by one to eight spaces + // such that the total number of characters up to and including + // the replacement is a multiple of eight [...]" + // https://docs.python.org/3.1/reference/lexical_analysis.html#indentation + static int getIndentationCount(String spaces) { + int count = 0; + for (char ch : spaces.toCharArray()) { + switch (ch) { + case '\t': + count += 8 - (count % 8); + break; + default: + // A normal space char. + count++; + } + } + return count; + } +} + + +// 2. Modify this g4 by comparing it with Java: +// - https://pythonconquerstheuniverse.wordpress.com/2009/10/03/python-java-a-side-by-side-comparison/ +// - http://www.cs.gordon.edu/courses/cps122/handouts-2014/From%20Python%20to%20Java%20Lecture/A%20Comparison%20of%20the%20Syntax%20of%20Python%20and%20Java.pdf +// - http://cs.joensuu.fi/~pviktor/python/slides/cheatsheet.pdf +// - http://www.interfaceware.com/manual/chameleon/scripts/quickreference.pdf + +// DML Program is a list of expression +// For now, we only allow global function definitions (not nested or inside a while block) +pmlprogram: (blocks+=statement | functionBlocks+=functionStatement)* NEWLINE* EOF; + + + +statement returns [ StatementInfo info ] +@init { + // This actions occurs regardless of how many alternatives in this rule + $info = new StatementInfo(); +} : + // ------------------------------------------ + // ImportStatement + 'source' OPEN_PAREN filePath = STRING CLOSE_PAREN 'as' namespace=ID NEWLINE # ImportStatement + | 'setwd' OPEN_PAREN pathValue = STRING CLOSE_PAREN NEWLINE # PathStatement + // ------------------------------------------ + // AssignmentStatement + | targetList+=dataIdentifier '=' 'ifdef' OPEN_PAREN commandLineParam=dataIdentifier ',' source=expression CLOSE_PAREN NEWLINE # IfdefAssignmentStatement + // ------------------------------------------ + // Treat function call as AssignmentStatement or MultiAssignmentStatement + // For backward compatibility and also since the behavior of foo() * A + foo() ... where foo returns A + // Convert FunctionCallIdentifier(paramExprs, ..) -> source + | // TODO: Throw an informative error if user doesnot provide the optional assignment + ( targetList+=dataIdentifier '=' )? name=ID OPEN_PAREN (paramExprs+=parameterizedExpression (',' paramExprs+=parameterizedExpression)* )? CLOSE_PAREN NEWLINE # FunctionCallAssignmentStatement + | OPEN_BRACK targetList+=dataIdentifier (',' targetList+=dataIdentifier)* CLOSE_BRACK '=' name=ID OPEN_PAREN (paramExprs+=parameterizedExpression (',' paramExprs+=parameterizedExpression)* )? CLOSE_PAREN NEWLINE # FunctionCallMultiAssignmentStatement + // {notifyErrorListeners("Too many parentheses");} + // We don't support block statement + // | '{' body+=expression ';'* ( body+=expression ';'* )* '}' # BlockStatement + // ------------------------------------------ + | targetList+=dataIdentifier '=' source=expression NEWLINE # AssignmentStatement + // IfStatement + // | 'if' OPEN_PAREN predicate=expression CLOSE_PAREN (ifBody+=statement ';'* | NEWLINE INDENT (ifBody+=statement)+ DEDENT ) ('else' (elseBody+=statement ';'* | '{' (elseBody+=statement ';'*)* '}'))? # IfStatement + | 'if' (OPEN_PAREN predicate=expression CLOSE_PAREN | predicate=expression) ':' NEWLINE INDENT (ifBody+=statement)+ DEDENT ('else' ':' NEWLINE INDENT (elseBody+=statement)+ DEDENT )? # IfStatement + // ------------------------------------------ + // ForStatement & ParForStatement + | 'for' (OPEN_PAREN iterVar=ID 'in' iterPred=iterablePredicate (',' parForParams+=strictParameterizedExpression)* CLOSE_PAREN | iterVar=ID 'in' iterPred=iterablePredicate (',' parForParams+=strictParameterizedExpression)* ) ':' NEWLINE INDENT (body+=statement)+ DEDENT # ForStatement + // Convert strictParameterizedExpression to HashMap<String, String> for parForParams + | 'parfor' (OPEN_PAREN iterVar=ID 'in' iterPred=iterablePredicate (',' parForParams+=strictParameterizedExpression)* CLOSE_PAREN | iterVar=ID 'in' iterPred=iterablePredicate (',' parForParams+=strictParameterizedExpression)* ) ':' NEWLINE INDENT (body+=statement)+ DEDENT # ParForStatement + | 'while' ( OPEN_PAREN predicate=expression CLOSE_PAREN | predicate=expression ) ':' NEWLINE INDENT (body+=statement)+ DEDENT # WhileStatement + // ------------------------------------------ + | NEWLINE #IgnoreNewLine +; + +iterablePredicate returns [ ExpressionInfo info ] + @init { + // This actions occurs regardless of how many alternatives in this rule + $info = new ExpressionInfo(); + } : + from=expression ':' to=expression #IterablePredicateColonExpression + | ID OPEN_PAREN from=expression ',' to=expression ',' increment=expression CLOSE_PAREN #IterablePredicateSeqExpression + ; + +functionStatement returns [ StatementInfo info ] +@init { + // This actions occurs regardless of how many alternatives in this rule + $info = new StatementInfo(); +} : + // ------------------------------------------ + // FunctionStatement & ExternalFunctionStatement + // small change: only allow typed arguments here ... instead of data identifier + 'def' name=ID OPEN_PAREN ( inputParams+=typedArgNoAssign (',' inputParams+=typedArgNoAssign)* )? CLOSE_PAREN ( '->' OPEN_PAREN ( outputParams+=typedArgNoAssign (',' outputParams+=typedArgNoAssign)* )? CLOSE_PAREN )? ':' NEWLINE INDENT (body+=statement)+ DEDENT # InternalFunctionDefExpression + | 'defExternal' name=ID OPEN_PAREN ( inputParams+=typedArgNoAssign (',' inputParams+=typedArgNoAssign)* )? CLOSE_PAREN ( '->' OPEN_PAREN ( outputParams+=typedArgNoAssign (',' outputParams+=typedArgNoAssign)* )? CLOSE_PAREN )? 'implemented' 'in' OPEN_PAREN ( otherParams+=strictParameterizedKeyValueString (',' otherParams+=strictParameterizedKeyValueString)* )? CLOSE_PAREN NEWLINE # ExternalFunctionDefExpression + // ------------------------------------------ +; + + +// Other data identifiers are typedArgNoAssign, parameterizedExpression and strictParameterizedExpression +dataIdentifier returns [ ExpressionInfo dataInfo ] +@init { + // This actions occurs regardless of how many alternatives in this rule + $dataInfo = new ExpressionInfo(); + // $dataInfo.expr = new org.apache.sysml.parser.DataIdentifier(); +} : + // ------------------------------------------ + // IndexedIdentifier + name=ID OPEN_BRACK (rowLower=expression (':' rowUpper=expression)?)? ',' (colLower=expression (':' colUpper=expression)?)? CLOSE_BRACK # IndexedExpression + // ------------------------------------------ + | ID # SimpleDataIdentifierExpression + | COMMANDLINE_NAMED_ID # CommandlineParamExpression + | COMMANDLINE_POSITION_ID # CommandlinePositionExpression +; +expression returns [ ExpressionInfo info ] +@init { + // This actions occurs regardless of how many alternatives in this rule + $info = new ExpressionInfo(); + // $info.expr = new org.apache.sysml.parser.BinaryExpression(org.apache.sysml.parser.Expression.BinaryOp.INVALID); +} : + // ------------------------------------------ + // BinaryExpression + // power + <assoc=right> left=expression op='**' right=expression # PowerExpression + // unary plus and minus + | op=('-'|'+') left=expression # UnaryExpression + // sequence - since we are only using this into for loop => Array not supported + //| left=expression op=':' right=expression # SequenceExpression + // matrix multiply + // | left=expression op='*' right=expression # MatrixMulExpression + // modulus and integer division + | left=expression op=('//' | '%' ) right=expression # ModIntDivExpression + // arithmetic multiply and divide + | left=expression op=('*'|'/') right=expression # MultDivExpression + // arithmetic addition and subtraction + | left=expression op=('+'|'-') right=expression # AddSubExpression + // ------------------------------------------ + // RelationalExpression + | left=expression op=('>'|'>='|'<'|'<='|'=='|'!=') right=expression # RelationalExpression + // ------------------------------------------ + // BooleanExpression + // boolean not + | op='!' left=expression # BooleanNotExpression + // boolean and + | left=expression op=('&'|'and') right=expression # BooleanAndExpression + // boolean or + | left=expression op=('|'|'or') right=expression # BooleanOrExpression + + // --------------------------------- + // only applicable for builtin function expressions + // Add following additional functions and check number of parameters: + // power, full, matrix, reshape, dot + // Also take care whether there is y.transpose() => which sometinamespace + | name=ID OPEN_PAREN (paramExprs+=parameterizedExpression (',' paramExprs+=parameterizedExpression)* )? CLOSE_PAREN ';'* # BuiltinFunctionExpression + + // 4. Atomic + | OPEN_PAREN left=expression CLOSE_PAREN # AtomicExpression + + // Should you allow indexed expression here ? + // | OPEN_BRACK targetList+=expression (',' targetList+=expression)* CLOSE_BRACK # MultiIdExpression + + // | BOOLEAN # ConstBooleanIdExpression + | 'True' # ConstTrueExpression + | 'False' # ConstFalseExpression + | INT # ConstIntIdExpression + | DOUBLE # ConstDoubleIdExpression + | STRING # ConstStringIdExpression + | dataIdentifier # DataIdExpression + // Special + // | 'NULL' | 'NA' | 'Inf' | 'NaN' +; + +typedArgNoAssign : paramName=ID ':' paramType=ml_type ; +parameterizedExpression : (paramName=ID '=')? paramVal=expression; +strictParameterizedExpression : paramName=ID '=' paramVal=expression ; +strictParameterizedKeyValueString : paramName=ID '=' paramVal=STRING ; +// sometimes this is matrix object and sometimes its namespace +ID : (ALPHABET (ALPHABET|DIGIT|'_')* '.')? ALPHABET (ALPHABET|DIGIT|'_')* + // Special ID cases: + // | 'matrix' // --> This is a special case which causes lot of headache + // | 'scalar' | 'float' | 'int' | 'bool' // corresponds to as.scalar, as.double, as.integer and as.logical + | 'index.return' +; +// Unfortunately, we have datatype name clashing with builtin function name: matrix :( +// Therefore, ugly work around for checking datatype +ml_type : valueType | dataType OPEN_BRACK valueType CLOSE_BRACK; +// Note to reduce number of keywords, these are case-sensitive, +// To allow case-insenstive, 'int' becomes: ('i' | 'I') ('n' | 'N') ('t' | 'T') +valueType: + ID # ValueDataTypeCheck + // 'int' | 'str' | 'bool' | 'float' +; +dataType: + // 'scalar' # ScalarDataTypeDummyCheck + // | + ID # MatrixDataTypeCheck //{ if($ID.text.compareTo("matrix") != 0) { notifyErrorListeners("incorrect datatype"); } } + //| 'matrix' //---> See ID, this causes lot of headache + ; +INT : DIGIT+ [Ll]?; +// BOOLEAN : 'TRUE' | 'FALSE'; +DOUBLE: DIGIT+ '.' DIGIT* EXP? [Ll]? +| DIGIT+ EXP? [Ll]? +| '.' DIGIT+ EXP? [Ll]? +; +DIGIT: '0'..'9'; +ALPHABET : [a-zA-Z] ; +fragment EXP : ('E' | 'e') ('+' | '-')? INT ; +COMMANDLINE_NAMED_ID: '$' ALPHABET (ALPHABET|DIGIT|'_')*; +COMMANDLINE_POSITION_ID: '$' DIGIT+; + +// supports single and double quoted string with escape characters +STRING: '"' ( ESC | ~[\\"] )*? '"' | '\'' ( ESC | ~[\\'] )*? '\''; +fragment ESC : '\\' [abtnfrv"'\\] ; +// Comments, whitespaces and new line +// LINE_COMMENT : '#' .*? '\r'? '\n' -> skip ; +// MULTILINE_BLOCK_COMMENT : '/*' .*? '*/' -> skip ; +// WHITESPACE : (' ' | '\r' | '\n')+ -> skip ; + +OPEN_BRACK : '[' {opened++;}; +CLOSE_BRACK : ']' {opened--;}; +OPEN_PAREN : '(' {opened++;}; +CLOSE_PAREN : ')' {opened--;}; +// OPEN_BRACE : '{' {opened++;}; +// CLOSE_BRACE : '}' {opened--;}; + +fragment SPACES : [ \t]+ ; +fragment COMMENT : '#' ~[\r\n]* ; +fragment LINE_JOINING : '\\' SPACES? ( '\r'? '\n' | '\r' ) ; + +NEWLINE : ( '\r'? '\n' | '\r' ) SPACES? +{ + String newLine = getText().replaceAll("[^\r\n]+", ""); + String spaces = getText().replaceAll("[\r\n]+", ""); + int next = _input.LA(1); + if (opened > 0 || next == '\r' || next == '\n' || next == '#') { + // If we're inside a list or on a blank line, ignore all indents, + // dedents and line breaks. + skip(); + if(debugIndentRules) { + if(next == '\r' || next == '\n') { + System.out.println("4.1 Skipping (blank lines)"); + } + else if(next == '#') { + System.out.println("4.2 Skipping (comment)"); + } + else { + System.out.println("4.2 Skipping something else"); + } + } + } + else { + emit(commonToken(NEWLINE, newLine)); + + int indent = getIndentationCount(spaces); + int previous = indents.isEmpty() ? 0 : indents.peek(); + if (indent == previous) { + if(debugIndentRules) + System.out.println("3. Skipping identation as of same size:" + next); + + // skip indents of the same size as the present indent-size + skip(); + } + else if (indent > previous) { + if(debugIndentRules) + System.out.println("1. Indent:" + next); + + indents.push(indent); + emit(commonToken(PydmlParser.INDENT, spaces)); + } + else { + // Possibly emit more than 1 DEDENT token. + while(!indents.isEmpty() && indents.peek() > indent) { + if(debugIndentRules) + System.out.println("2. Dedent:" + next); + + this.emit(createDedent()); + indents.pop(); + } + } + } +} +; + +SKIP : ( SPACES | COMMENT | LINE_JOINING ) -> skip ; http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/c04fc99f/src/main/java/org/apache/sysml/parser/pydml/PydmlSyntacticErrorListener.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/parser/pydml/PydmlSyntacticErrorListener.java b/src/main/java/org/apache/sysml/parser/pydml/PydmlSyntacticErrorListener.java new file mode 100644 index 0000000..8b2cc34 --- /dev/null +++ b/src/main/java/org/apache/sysml/parser/pydml/PydmlSyntacticErrorListener.java @@ -0,0 +1,114 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.sysml.parser.pydml; + +import java.util.Stack; + +import org.antlr.v4.runtime.BaseErrorListener; +import org.antlr.v4.runtime.RecognitionException; +import org.antlr.v4.runtime.Recognizer; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; + +import org.apache.sysml.api.DMLScript; + +public class PydmlSyntacticErrorListener +{ + private static final Log LOG = LogFactory.getLog(DMLScript.class.getName()); + + public static class CustomDmlErrorListener extends BaseErrorListener { + + private boolean atleastOneError = false; + private Stack<String> currentFileName = new Stack<String>(); + + public void pushCurrentFileName(String currentFilePath) { + currentFileName.push(currentFilePath); + } + + public String peekFileName() { + return currentFileName.peek(); + } + + public String popFileName() { + return currentFileName.pop(); + } + + public void validationError(int line, int charPositionInLine, String msg) { + try { + setAtleastOneError(true); + // Print error messages with file name + if(currentFileName == null || currentFileName.empty()) { + LOG.error("line "+line+":"+charPositionInLine+" "+msg); + } + else { + String fileName = currentFileName.peek(); + LOG.error(fileName + " line "+line+":"+charPositionInLine+" "+msg); + } + } + catch(Exception e1) { + LOG.error("ERROR: while customizing error message:" + e1); + } + } + + public void validationWarning(int line, int charPositionInLine, String msg) { + try { + //atleastOneError = true; ---> not an error, just warning + // Print error messages with file name + if(currentFileName == null || currentFileName.empty()) + LOG.warn("line "+line+":"+charPositionInLine+" "+msg); + else { + String fileName = currentFileName.peek(); + LOG.warn(fileName + " line "+line+":"+charPositionInLine+" "+msg); + } + } + catch(Exception e1) { + LOG.warn("ERROR: while customizing error message:" + e1); + } + } + + @Override + public void syntaxError(Recognizer<?, ?> recognizer, Object offendingSymbol, + int line, int charPositionInLine, + String msg, RecognitionException e) + { + try { + setAtleastOneError(true); + // Print error messages with file name + if(currentFileName == null || currentFileName.empty()) + LOG.error("line "+line+":"+charPositionInLine+" "+msg); + else { + String fileName = currentFileName.peek(); + LOG.error(fileName + " line "+line+":"+charPositionInLine+" "+msg); + } + } + catch(Exception e1) { + LOG.error("ERROR: while customizing error message:" + e1); + } + } + + public boolean isAtleastOneError() { + return atleastOneError; + } + + public void setAtleastOneError(boolean atleastOneError) { + this.atleastOneError = atleastOneError; + } + } +}
