[SYSTEMML-456] Parser code deduplication Refactor code common to DML & PyDML parser packages. Fix: comment code, package qualification, package-info and error msgs. Simplify getConstIdFromString. Style fixes, refactor out constant binary expression. Change occurences of B.compareTo(A) == 0 to B.equals(A). Organize imports. Condensed unnecessary branches.
Project: http://git-wip-us.apache.org/repos/asf/incubator-systemml/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-systemml/commit/6017f069 Tree: http://git-wip-us.apache.org/repos/asf/incubator-systemml/tree/6017f069 Diff: http://git-wip-us.apache.org/repos/asf/incubator-systemml/diff/6017f069 Branch: refs/heads/master Commit: 6017f069a3fc92cd10ea2112f3d8997b4af066b0 Parents: f21eab7 Author: Nakul Jindal <[email protected]> Authored: Tue Feb 2 11:26:37 2016 -0800 Committer: Deron Eriksson <[email protected]> Committed: Fri Feb 19 11:57:04 2016 -0800 ---------------------------------------------------------------------- .../org/apache/sysml/parser/AParserWrapper.java | 111 +- .../sysml/parser/FunctionCallIdentifier.java | 4 +- .../parser/common/CommonSyntacticValidator.java | 713 +++++++ .../sysml/parser/common/ExpressionInfo.java | 40 + .../sysml/parser/common/StatementInfo.java | 47 + .../parser/common/SyntacticErrorListener.java | 111 ++ .../sysml/parser/dml/DMLParserWrapper.java | 167 +- .../java/org/apache/sysml/parser/dml/Dml.g4 | 14 +- .../parser/dml/DmlSyntacticErrorListener.java | 115 -- .../sysml/parser/dml/DmlSyntacticValidator.java | 1484 +++++---------- .../parser/dml/DmlSyntacticValidatorHelper.java | 108 -- .../apache/sysml/parser/dml/ExpressionInfo.java | 31 - .../apache/sysml/parser/dml/StatementInfo.java | 36 - .../sysml/parser/pydml/ExpressionInfo.java | 32 - .../sysml/parser/pydml/PyDMLParserWrapper.java | 77 +- .../java/org/apache/sysml/parser/pydml/Pydml.g4 | 14 +- .../pydml/PydmlSyntacticErrorListener.java | 114 -- .../parser/pydml/PydmlSyntacticValidator.java | 1789 +++++++----------- .../pydml/PydmlSyntacticValidatorHelper.java | 101 - .../sysml/parser/pydml/StatementInfo.java | 32 - 20 files changed, 2179 insertions(+), 2961 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/6017f069/src/main/java/org/apache/sysml/parser/AParserWrapper.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/parser/AParserWrapper.java b/src/main/java/org/apache/sysml/parser/AParserWrapper.java index 6e67954..8db932a 100644 --- a/src/main/java/org/apache/sysml/parser/AParserWrapper.java +++ b/src/main/java/org/apache/sysml/parser/AParserWrapper.java @@ -19,10 +19,19 @@ package org.apache.sysml.parser; +import java.io.BufferedReader; +import java.io.FileReader; +import java.io.IOException; +import java.io.InputStreamReader; import java.util.HashMap; +import org.apache.commons.logging.Log; +import org.apache.hadoop.fs.FileSystem; +import org.apache.hadoop.fs.Path; +import org.apache.sysml.conf.ConfigurationManager; import org.apache.sysml.parser.dml.DMLParserWrapper; import org.apache.sysml.parser.pydml.PyDMLParserWrapper; +import org.apache.sysml.runtime.util.LocalFileUtils; /** * Base class for all dml parsers in order to make the various compilation chains @@ -34,25 +43,18 @@ public abstract class AParserWrapper //1) skip errors on unspecified args (modified by mlcontext / jmlc) public static boolean IGNORE_UNSPECIFIED_ARGS = false; - /** - * - * @param fileName - * @param dmlScript - * @param argVals - * @return - * @throws ParseException - */ - public abstract DMLProgram parse(String fileName, String dmlScript, HashMap<String,String> argVals) - throws ParseException; + public abstract DMLProgram parse(String fileName, String dmlScript, HashMap<String, String> argVals) throws ParseException; + /** * Factory method for creating instances of AParserWrapper, for * simplificy fused with the abstract class. * - * @param pydml + * @param pydml true if a PyDML parser is needed * @return */ + @SuppressWarnings("rawtypes") public static AParserWrapper createParser(boolean pydml) { AParserWrapper ret = null; @@ -65,4 +67,91 @@ public abstract class AParserWrapper return ret; } + + /** + * Custom wrapper to convert statement into statement blocks. Called by doParse and in DmlSyntacticValidator for for, parfor, while, ... + * @param current a statement + * @return corresponding statement block + */ + public static StatementBlock getStatementBlock(Statement current) { + StatementBlock blk = null; + if(current instanceof ParForStatement) { + blk = new ParForStatementBlock(); + blk.addStatement(current); + } + else if(current instanceof ForStatement) { + blk = new ForStatementBlock(); + blk.addStatement(current); + } + else if(current instanceof IfStatement) { + blk = new IfStatementBlock(); + blk.addStatement(current); + } + else if(current instanceof WhileStatement) { + blk = new WhileStatementBlock(); + blk.addStatement(current); + } + else { + // This includes ImportStatement + blk = new StatementBlock(); + blk.addStatement(current); + } + return blk; + } + + + public static String readDMLScript( String script, Log LOG) + throws IOException, LanguageException + { + String dmlScriptStr = null; + + //read DML script from file + if(script == null) + throw new LanguageException("DML script path was not specified!"); + + StringBuilder sb = new StringBuilder(); + BufferedReader in = null; + try + { + //read from hdfs or gpfs file system + if( script.startsWith("hdfs:") + || script.startsWith("gpfs:") ) + { + if( !LocalFileUtils.validateExternalFilename(script, true) ) + throw new LanguageException("Invalid (non-trustworthy) hdfs filename."); + FileSystem fs = FileSystem.get(ConfigurationManager.getCachedJobConf()); + Path scriptPath = new Path(script); + in = new BufferedReader(new InputStreamReader(fs.open(scriptPath))); + } + // from local file system + else + { + if( !LocalFileUtils.validateExternalFilename(script, false) ) + throw new LanguageException("Invalid (non-trustworthy) local filename."); + in = new BufferedReader(new FileReader(script)); + } + + //core script reading + String tmp = null; + while ((tmp = in.readLine()) != null) + { + sb.append( tmp ); + sb.append( "\n" ); + } + } + catch (IOException ex) + { + LOG.error("Failed to read the script from the file system", ex); + throw ex; + } + finally + { + if( in != null ) + in.close(); + } + + dmlScriptStr = sb.toString(); + + return dmlScriptStr; + } } http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/6017f069/src/main/java/org/apache/sysml/parser/FunctionCallIdentifier.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/parser/FunctionCallIdentifier.java b/src/main/java/org/apache/sysml/parser/FunctionCallIdentifier.java index a790158..05911dc 100644 --- a/src/main/java/org/apache/sysml/parser/FunctionCallIdentifier.java +++ b/src/main/java/org/apache/sysml/parser/FunctionCallIdentifier.java @@ -36,11 +36,11 @@ public class FunctionCallIdentifier extends DataIdentifier * @param functionName the (optional) namespace information and name of function. If both namespace and name are specified, they are concatinated with "::" * @throws ParseException */ - public void setFunctionName(String functionName) throws ParseException{ + public void setFunctionName(String functionName) { _name = functionName; } - public void setFunctionNamespace(String passed) throws ParseException{ + public void setFunctionNamespace(String passed) { _namespace = passed; } http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/6017f069/src/main/java/org/apache/sysml/parser/common/CommonSyntacticValidator.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/parser/common/CommonSyntacticValidator.java b/src/main/java/org/apache/sysml/parser/common/CommonSyntacticValidator.java new file mode 100644 index 0000000..27beb0d --- /dev/null +++ b/src/main/java/org/apache/sysml/parser/common/CommonSyntacticValidator.java @@ -0,0 +1,713 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.sysml.parser.common; + +import java.util.ArrayList; +import java.util.HashMap; +import java.util.Map; +import java.util.Set; +import java.util.regex.Pattern; + +import org.antlr.v4.runtime.ParserRuleContext; +import org.antlr.v4.runtime.Token; +import org.apache.sysml.parser.AssignmentStatement; +import org.apache.sysml.parser.BinaryExpression; +import org.apache.sysml.parser.BooleanExpression; +import org.apache.sysml.parser.BooleanIdentifier; +import org.apache.sysml.parser.BuiltinFunctionExpression; +import org.apache.sysml.parser.ConstIdentifier; +import org.apache.sysml.parser.DMLProgram; +import org.apache.sysml.parser.DataExpression; +import org.apache.sysml.parser.DataIdentifier; +import org.apache.sysml.parser.DoubleIdentifier; +import org.apache.sysml.parser.Expression; +import org.apache.sysml.parser.Expression.DataOp; +import org.apache.sysml.parser.FunctionCallIdentifier; +import org.apache.sysml.parser.IndexedIdentifier; +import org.apache.sysml.parser.IntIdentifier; +import org.apache.sysml.parser.LanguageException; +import org.apache.sysml.parser.MultiAssignmentStatement; +import org.apache.sysml.parser.OutputStatement; +import org.apache.sysml.parser.ParameterExpression; +import org.apache.sysml.parser.ParameterizedBuiltinFunctionExpression; +import org.apache.sysml.parser.PrintStatement; +import org.apache.sysml.parser.RelationalExpression; +import org.apache.sysml.parser.Statement; +import org.apache.sysml.parser.StringIdentifier; +import org.apache.sysml.parser.common.SyntacticErrorListener.CustomErrorListener; +import org.apache.sysml.parser.dml.DmlParser.BuiltinFunctionExpressionContext; +import org.apache.sysml.parser.dml.DmlSyntacticValidator; +import org.apache.sysml.parser.pydml.PydmlSyntacticValidator; + +import com.google.common.primitives.Doubles; +import com.google.common.primitives.Longs; + +/** + * Contains fields and (helper) methods common to {@link DmlSyntacticValidator} and {@link PydmlSyntacticValidator} + */ +public abstract class CommonSyntacticValidator { + + protected final CustomErrorListener errorListener; + protected final String currentFile; + protected String _workingDir = "."; //current working directory + protected HashMap<String,String> argVals = null; + + public CommonSyntacticValidator(CustomErrorListener errorListener, HashMap<String,String> argVals) { + this.errorListener = errorListener; + currentFile = errorListener.getCurrentFileName(); + this.argVals = argVals; + } + + protected void notifyErrorListeners(String message, int line, int charPositionInLine) { + errorListener.validationError(line, charPositionInLine, message); + } + + protected void notifyErrorListeners(String message, Token op) { + errorListener.validationError(op.getLine(), op.getCharPositionInLine(), message); + } + + protected void raiseWarning(String message, Token op) { + errorListener.validationWarning(op.getLine(), op.getCharPositionInLine(), message); + } + + // Different namespaces for DML (::) and PyDml (.) + public abstract String namespaceResolutionOp(); + + // Returns list of two elements <namespace, function names>, else null + protected String[] getQualifiedNames(String fullyQualifiedFunctionName) { + String splitStr = Pattern.quote(namespaceResolutionOp()); + String [] fnNames = fullyQualifiedFunctionName.split(splitStr); + String functionName = ""; + String namespace = ""; + if(fnNames.length == 1) { + namespace = DMLProgram.DEFAULT_NAMESPACE; + functionName = fnNames[0].trim(); + } + else if(fnNames.length == 2) { + namespace = fnNames[0].trim(); + functionName = fnNames[1].trim(); + } + else + return null; + + String[] retVal = new String[2]; + retVal[0] = namespace; + retVal[1] = functionName; + return retVal; + } + + protected boolean validateBuiltinFunctions(String function) { + String functionName = function.replaceAll(" ", "").trim(); + if(functionName.equals("write") || functionName.equals(DMLProgram.DEFAULT_NAMESPACE + namespaceResolutionOp() + "write")) { + return validateBuiltinWriteFunction(function); + } + return true; + } + + protected boolean validateBuiltinWriteFunction(String function) { + return true; + } + + + protected void setFileLineColumn(Expression expr, ParserRuleContext ctx) { + String txt = ctx.getText(); + expr.setFilename(currentFile); + expr.setBeginLine(ctx.start.getLine()); + expr.setBeginColumn(ctx.start.getCharPositionInLine()); + expr.setEndLine(ctx.stop.getLine()); + expr.setEndColumn(ctx.stop.getCharPositionInLine()); + if(expr.getBeginColumn() == expr.getEndColumn() && expr.getBeginLine() == expr.getEndLine() && txt.length() > 1) { + expr.setEndColumn(expr.getBeginColumn() + txt.length() - 1); + } + } + + protected void setFileLineColumn(Statement stmt, ParserRuleContext ctx) { + String txt = ctx.getText(); + stmt.setFilename(currentFile); + stmt.setBeginLine(ctx.start.getLine()); + stmt.setBeginColumn(ctx.start.getCharPositionInLine()); + stmt.setEndLine(ctx.stop.getLine()); + stmt.setEndColumn(ctx.stop.getCharPositionInLine()); + if(stmt.getBeginColumn() == stmt.getEndColumn() && stmt.getBeginLine() == stmt.getEndLine() && txt.length() > 1) { + stmt.setEndColumn(stmt.getBeginColumn() + txt.length() - 1); + } + } + + // For String literal "True/TRUE" + public abstract String trueStringLiteral(); + + // For String literal "False/FALSE" + public abstract String falseStringLiteral(); + + // -------------------------------------------------------------------- + // HELPER METHODS FOR OVERRIDDEN VISITOR FUNCTIONS + // -------------------------------------------------------------------- + + protected void binaryExpressionHelper(ParserRuleContext ctx, ExpressionInfo left, ExpressionInfo right, + ExpressionInfo me, String op) { + if(left.expr != null && right.expr != null) { + Expression.BinaryOp bop = Expression.getBinaryOp(op); + BinaryExpression be = new BinaryExpression(bop); + be = new BinaryExpression(bop); + be.setLeft(left.expr); + be.setRight(right.expr); + me.expr = be; + setFileLineColumn(me.expr, ctx); + } + } + + protected void relationalExpressionHelper(ParserRuleContext ctx, ExpressionInfo left, ExpressionInfo right, + ExpressionInfo me, String op) { + if(left.expr != null && right.expr != null) { + Expression.RelationalOp rop = Expression.getRelationalOp(op); + RelationalExpression re = new RelationalExpression(rop); + re.setLeft(left.expr); + re.setRight(right.expr); + me.expr = re; + setFileLineColumn(me.expr, ctx); + } + } + + protected void booleanExpressionHelper(ParserRuleContext ctx, ExpressionInfo left, ExpressionInfo right, + ExpressionInfo me, String op) { + if(left.expr != null && right.expr != null) { + Expression.BooleanOp bop = Expression.getBooleanOp(op); + BooleanExpression re = new BooleanExpression(bop); + re.setLeft(left.expr); + re.setRight(right.expr); + me.expr = re; + setFileLineColumn(me.expr, ctx); + } + } + + + + protected void unaryExpressionHelper(ParserRuleContext ctx, ExpressionInfo left, ExpressionInfo me, String op) { + if(left.expr != null) { + Token start = ctx.start; + String fileName = currentFile; + int line = start.getLine(); + int col = start.getCharPositionInLine(); + + if(left.expr instanceof IntIdentifier) { + if(op.equals("-")) { + ((IntIdentifier) left.expr).multiplyByMinusOne(); + } + me.expr = left.expr; + } + else if(left.expr instanceof DoubleIdentifier) { + if(op.equals("-")) { + ((DoubleIdentifier) left.expr).multiplyByMinusOne(); + } + me.expr = left.expr; + } + else { + Expression right = new IntIdentifier(1, fileName, line, col, line, col); + if(op.equals("-")) { + right = new IntIdentifier(-1, fileName, line, col, line, col); + } + + Expression.BinaryOp bop = Expression.getBinaryOp("*"); + BinaryExpression be = new BinaryExpression(bop); + be.setLeft(left.expr); + be.setRight(right); + me.expr = be; + } + setFileLineColumn(me.expr, ctx); + } + } + + protected void unaryBooleanExpressionHelper(ParserRuleContext ctx, ExpressionInfo left, ExpressionInfo me, + String op) { + if(left.expr != null) { + Expression.BooleanOp bop = Expression.getBooleanOp(op); + BooleanExpression be = new BooleanExpression(bop); + be.setLeft(left.expr); + me.expr = be; + setFileLineColumn(me.expr, ctx); + } + } + + + protected void constDoubleIdExpressionHelper(ParserRuleContext ctx, ExpressionInfo me) { + try { + Token start = ctx.start; + double val = Double.parseDouble(ctx.getText()); + int linePosition = start.getLine(); + int charPosition = start.getCharPositionInLine(); + me.expr = new DoubleIdentifier(val, currentFile, linePosition, charPosition, linePosition, charPosition); + setFileLineColumn(me.expr, ctx); + } + catch(Exception e) { + notifyErrorListeners("cannot parse the float value: \'" + ctx.getText() + "\'", ctx.getStart()); + return; + } + } + + protected void constIntIdExpressionHelper(ParserRuleContext ctx, ExpressionInfo me) { + try { + Token start = ctx.start; + long val = Long.parseLong(ctx.getText()); + int linePosition = start.getLine(); + int charPosition = start.getCharPositionInLine(); + me.expr = new IntIdentifier(val, currentFile, linePosition, charPosition, linePosition, charPosition); + setFileLineColumn(me.expr, ctx); + } + catch(Exception e) { + notifyErrorListeners("cannot parse the int value: \'" + ctx.getText() + "\'", ctx.getStart()); + return; + } + } + + protected void constStringIdExpressionHelper(ParserRuleContext ctx, ExpressionInfo me) { + String val = ""; + String text = ctx.getText(); + if( (text.startsWith("\"") && text.endsWith("\"")) || + (text.startsWith("\'") && text.endsWith("\'"))) { + if(text.length() > 2) { + val = text.substring(1, text.length()-1); + } + } + else { + notifyErrorListeners("incorrect string literal ", ctx.start); + return; + } + + int linePosition = ctx.start.getLine(); + int charPosition = ctx.start.getCharPositionInLine(); + me.expr = new StringIdentifier(val, currentFile, linePosition, charPosition, linePosition, charPosition); + setFileLineColumn(me.expr, ctx); + } + + protected void booleanIdentifierHelper(ParserRuleContext ctx, boolean val, ExpressionInfo info) { + int linePosition = ctx.start.getLine(); + int charPosition = ctx.start.getCharPositionInLine(); + info.expr = new BooleanIdentifier(val, currentFile, linePosition, charPosition, linePosition, charPosition); + setFileLineColumn(info.expr, ctx); + } + + protected void exitDataIdExpressionHelper(ParserRuleContext ctx, ExpressionInfo me, ExpressionInfo dataInfo) { + me.expr = dataInfo.expr; + int line = ctx.start.getLine(); + int col = ctx.start.getCharPositionInLine(); + me.expr.setAllPositions(currentFile, line, col, line, col); + setFileLineColumn(me.expr, ctx); + } + + protected void exitIndexedExpressionHelper(ParserRuleContext ctx, String name, ExpressionInfo dataInfo, + ExpressionInfo rowLower, ExpressionInfo rowUpper, ExpressionInfo colLower, ExpressionInfo colUpper) { + dataInfo.expr = new IndexedIdentifier(name, false, false); + setFileLineColumn(dataInfo.expr, ctx); + boolean isRowLower = rowLower != null; + boolean isRowUpper = rowUpper != null; + boolean isColLower = colLower != null; + boolean isColUpper = colUpper != null; + try { + ArrayList< ArrayList<Expression> > exprList = new ArrayList< ArrayList<Expression> >(); + + ArrayList<Expression> rowIndices = new ArrayList<Expression>(); + ArrayList<Expression> colIndices = new ArrayList<Expression>(); + + + if(!isRowLower && !isRowUpper) { + // both not set + rowIndices.add(null); rowIndices.add(null); + } + else if(isRowLower && isRowUpper) { + // both set + rowIndices.add(incrementByOne(rowLower.expr, ctx)); + rowIndices.add(rowUpper.expr); + } + else if(isRowLower && !isRowUpper) { + // only row set + rowIndices.add(incrementByOne(rowLower.expr, ctx)); + } + else { + notifyErrorListeners("incorrect index expression for row", ctx.start); + return; + } + + if(!isColLower && !isColUpper) { + // both not set + colIndices.add(null); colIndices.add(null); + } + else if(isColLower && isColUpper) { + colIndices.add(incrementByOne(colLower.expr, ctx)); + colIndices.add(colUpper.expr); + } + else if(isColLower && !isColUpper) { + colIndices.add(incrementByOne(colLower.expr, ctx)); + } + else { + notifyErrorListeners("incorrect index expression for column", ctx.start); + return; + } + exprList.add(rowIndices); + exprList.add(colIndices); + ((IndexedIdentifier) dataInfo.expr).setIndices(exprList); + } + catch(Exception e) { + notifyErrorListeners("cannot set the indices", ctx.start); + return; + } + } + + private Expression incrementByOne(Expression expr, ParserRuleContext ctx) { + // For maintaining semantic consistency, we have decided to keep 1-based indexing + // If in future, PyDML becomes more popular than DML, this can be switched. + return expr; + } + + protected ConstIdentifier getConstIdFromString(String varValue, Token start) { + + int linePosition = start.getLine(); + int charPosition = start.getCharPositionInLine(); + + // Compare to "True/TRUE" + if(varValue.equals(trueStringLiteral())) + return new BooleanIdentifier(true, currentFile, linePosition, charPosition, linePosition, charPosition); + + // Compare to "False/FALSE" + if(varValue.equals(falseStringLiteral())) + return new BooleanIdentifier(false, currentFile, linePosition, charPosition, linePosition, charPosition); + + // Check for long literal + Long l = Longs.tryParse(varValue); + if (l != null) + return new IntIdentifier(l.longValue(), currentFile, linePosition, charPosition, linePosition, charPosition); + + // Check for double literal + Double d = Doubles.tryParse(varValue); + if (d != null) + return new DoubleIdentifier(d.doubleValue(), currentFile, linePosition, charPosition, linePosition, charPosition); + + // Otherwise it is a string literal (optionally enclosed within single or double quotes) + String val = ""; + String text = varValue; + if( (text.startsWith("\"") && text.endsWith("\"")) || (text.startsWith("\'") && text.endsWith("\'"))) { + if(text.length() > 2) { + val = text.substring(1, text.length()-1); + } + } + else { + // the commandline parameters can be passed without any quotes + val = varValue; + } + return new StringIdentifier(val, currentFile, linePosition, charPosition, linePosition, charPosition); + } + + + protected void fillExpressionInfoCommandLineParameters(String varName, ExpressionInfo dataInfo, Token start) { + + if(!varName.startsWith("$")) { + notifyErrorListeners("commandline param doesnot start with $", start); + return; + } + + String varValue = null; + for(Map.Entry<String, String> arg : this.argVals.entrySet()) { + if(arg.getKey().trim().equals(varName)) { + if(varValue != null) { + notifyErrorListeners("multiple values passed for the parameter " + varName + " via commandline", start); + return; + } + else { + varValue = arg.getValue().trim(); + } + } + } + + if(varValue == null) { + return; + } + + // Command line param cannot be empty string + // If you want to pass space, please quote it + if(varValue.trim().equals("")) + return; + + dataInfo.expr = getConstIdFromString(varValue, start); + } + + protected void exitAssignmentStatementHelper(ParserRuleContext ctx, String lhs, ExpressionInfo dataInfo, + Token lhsStart, ExpressionInfo rhs, StatementInfo info) { + if(lhs.startsWith("$")) { + notifyErrorListeners("assignment of commandline parameters is not allowed. (Quickfix: try using someLocalVariable=ifdef(" + lhs + ", default value))", ctx.start); + return; + } + + DataIdentifier target = null; + if(dataInfo.expr instanceof DataIdentifier) { + target = (DataIdentifier) dataInfo.expr; + Expression source = rhs.expr; + + int line = ctx.start.getLine(); + int col = ctx.start.getCharPositionInLine(); + try { + info.stmt = new AssignmentStatement(target, source, line, col, line, col); + setFileLineColumn(info.stmt, ctx); + } catch (LanguageException e) { + // TODO: extract more meaningful info from this exception. + notifyErrorListeners("invalid assignment", lhsStart); + return; + } + } + else { + notifyErrorListeners("incorrect lvalue in assignment statement", lhsStart); + return; + } + } + + + // ----------------------------------------------------------------- + // Helper Functions for exit*FunctionCall*AssignmentStatement + // ----------------------------------------------------------------- + + protected void setPrintStatement(ParserRuleContext ctx, String functionName, + ArrayList<ParameterExpression> paramExpression, StatementInfo thisinfo) { + if(paramExpression.size() != 1) { + notifyErrorListeners(functionName + "() has only one parameter", ctx.start); + return; + } + Expression expr = paramExpression.get(0).getExpr(); + if(expr == null) { + notifyErrorListeners("cannot process " + functionName + "() function", ctx.start); + return; + } + try { + int line = ctx.start.getLine(); + int col = ctx.start.getCharPositionInLine(); + thisinfo.stmt = new PrintStatement(functionName, expr, line, col, line, col); + setFileLineColumn(thisinfo.stmt, ctx); + } catch (LanguageException e) { + notifyErrorListeners("cannot process " + functionName + "() function", ctx.start); + return; + } + } + + protected void setOutputStatement(ParserRuleContext ctx, + ArrayList<ParameterExpression> paramExpression, StatementInfo info) { + if(paramExpression.size() < 2){ + notifyErrorListeners("incorrect usage of write function (atleast 2 arguments required)", ctx.start); + return; + } + if(paramExpression.get(0).getExpr() instanceof DataIdentifier) { + String fileName = currentFile; + int line = ctx.start.getLine(); + int col = ctx.start.getCharPositionInLine(); + HashMap<String, Expression> varParams = new HashMap<String, Expression>(); + varParams.put(DataExpression.IO_FILENAME, paramExpression.get(1).getExpr()); + for(int i = 2; i < paramExpression.size(); i++) { + // DataExpression.FORMAT_TYPE, DataExpression.DELIM_DELIMITER, DataExpression.DELIM_HAS_HEADER_ROW, DataExpression.DELIM_SPARSE + varParams.put(paramExpression.get(i).getName(), paramExpression.get(i).getExpr()); + } + + DataExpression dataExpression = new DataExpression(DataOp.WRITE, varParams, fileName, line, col, line, col); + info.stmt = new OutputStatement((DataIdentifier) paramExpression.get(0).getExpr(), DataOp.WRITE, fileName, line, col, line, col); + setFileLineColumn(info.stmt, ctx); + ((OutputStatement)info.stmt).setExprParams(dataExpression); + } + else { + notifyErrorListeners("incorrect usage of write function", ctx.start); + } + } + + protected void setAssignmentStatement(ParserRuleContext ctx, StatementInfo info, DataIdentifier target, Expression expression) { + try { + info.stmt = new AssignmentStatement(target, expression, ctx.start.getLine(), ctx.start.getCharPositionInLine(), ctx.start.getLine(), ctx.start.getCharPositionInLine()); + setFileLineColumn(info.stmt, ctx); + } catch (LanguageException e) { + // TODO: extract more meaningful info from this exception. + notifyErrorListeners("invalid function call", ctx.start); + return; + } + } + + /** + * Information about built in functions converted to a common format between + * PyDML and DML for the runtime. + */ + public static class ConvertedDMLSyntax { + public final String namespace; + public final String functionName; + public final ArrayList<ParameterExpression> paramExpression; + public ConvertedDMLSyntax(String namespace, String functionName, + ArrayList<ParameterExpression> paramExpression) { + this.namespace = namespace; + this.functionName = functionName; + this.paramExpression = paramExpression; + } + }; + + /** + * Converts PyDML/DML built in functions to a common format for the runtime. + * @param ctx + * @param namespace Namespace of the function + * @param functionName Name of the builtin function + * @param paramExpression Array of parameter names and values + * @param fnName Token of the built in function identifier + * @return + */ + protected abstract ConvertedDMLSyntax convertToDMLSyntax(ParserRuleContext ctx, String namespace, String functionName, ArrayList<ParameterExpression> paramExpression, + Token fnName); + + /** + * Function overridden for DML & PyDML that handles any language specific builtin functions + * @param ctx + * @param functionName + * @param paramExpressions + * @return instance of {@link Expression} + */ + protected abstract Expression handleLanguageSpecificFunction(ParserRuleContext ctx, String functionName, ArrayList<ParameterExpression> paramExpressions); + + /** Checks for builtin functions and does Action 'f'. + * <br/> + * Constructs the + * appropriate {@link AssignmentStatement} from + * {@link CommonSyntacticValidator#functionCallAssignmentStatementHelper(ParserRuleContext, Set, Set, Expression, StatementInfo, Token, Token, String, String, ArrayList, boolean) + * or Assign to {@link Expression} from + * {@link DmlSyntacticValidator#exitBuiltinFunctionExpression(BuiltinFunctionExpressionContext)} + * + * @param ctx + * @param functionName + * @param paramExpressions + * @return true if a builtin function was found + */ + protected boolean buildForBuiltInFunction(ParserRuleContext ctx, String functionName, ArrayList<ParameterExpression> paramExpressions, Action f) { + // In global namespace, so it can be a builtin function + // Double verification: verify passed function name is a (non-parameterized) built-in function. + String fileName = currentFile; + int line = ctx.start.getLine(); + int col = ctx.start.getCharPositionInLine(); + try { + + Expression lsf = handleLanguageSpecificFunction(ctx, functionName, paramExpressions); + if (lsf != null){ + setFileLineColumn(lsf, ctx); + f.execute(lsf); + return true; + } + + BuiltinFunctionExpression bife = BuiltinFunctionExpression.getBuiltinFunctionExpression(functionName, paramExpressions, fileName, line, col, line, col); + if (bife != null){ + // It is a builtin function + f.execute(bife); + return true; + } + + ParameterizedBuiltinFunctionExpression pbife = ParameterizedBuiltinFunctionExpression.getParamBuiltinFunctionExpression(functionName, paramExpressions, fileName, line, col, line, col); + if (pbife != null){ + // It is a parameterized builtin function + f.execute(pbife); + return true; + } + + // built-in read, rand ... + DataExpression dbife = DataExpression.getDataExpression(functionName, paramExpressions, fileName, line, col, line, col); + if (dbife != null){ + f.execute(dbife); + return true; + } + } catch(Exception e) { + notifyErrorListeners("unable to process builtin function expression " + functionName + ":" + e.getMessage(), ctx.start); + return true; + } + return false; + } + + + protected void functionCallAssignmentStatementHelper(final ParserRuleContext ctx, + Set<String> printStatements, Set<String> outputStatements, final Expression dataInfo, + final StatementInfo info, final Token nameToken, Token targetListToken, String namespace, + String functionName, ArrayList<ParameterExpression> paramExpression, boolean hasLHS) { + ConvertedDMLSyntax convertedSyntax = convertToDMLSyntax(ctx, namespace, functionName, paramExpression, nameToken); + if(convertedSyntax == null) { + return; + } + else { + namespace = convertedSyntax.namespace; + functionName = convertedSyntax.functionName; + paramExpression = convertedSyntax.paramExpression; + } + + // For builtin functions without LHS + if(namespace.equals(DMLProgram.DEFAULT_NAMESPACE)) { + if (printStatements.contains(functionName)){ + setPrintStatement(ctx, functionName, paramExpression, info); + return; + } + else if (outputStatements.contains(functionName)){ + setOutputStatement(ctx, paramExpression, info); + return; + } + } + + if (!hasLHS){ + notifyErrorListeners("function call needs to have lvalue (Quickfix: change it to \'tmpVar = " + functionName + "(...)\')", nameToken); + return; + } + + DataIdentifier target = null; + if(dataInfo instanceof DataIdentifier) { + target = (DataIdentifier) dataInfo; + } + else { + notifyErrorListeners("incorrect lvalue for function call ", targetListToken); + return; + } + + // For builtin functions with LHS + if(namespace.equals(DMLProgram.DEFAULT_NAMESPACE)){ + final DataIdentifier ftarget = target; + Action f = new Action() { + @Override public void execute(Expression e) { setAssignmentStatement(ctx, info , ftarget, e); } + }; + boolean validBIF = buildForBuiltInFunction(ctx, functionName, paramExpression, f); + if (validBIF) + return; + } + + // If builtin functions weren't found... + FunctionCallIdentifier functCall = new FunctionCallIdentifier(paramExpression); + functCall.setFunctionName(functionName); + functCall.setFunctionNamespace(namespace); + + setAssignmentStatement(ctx, info, target, functCall); + } + + /** + * To allow for different actions in + * {@link CommonSyntacticValidator#functionCallAssignmentStatementHelper(ParserRuleContext, Set, Set, Expression, StatementInfo, Token, Token, String, String, ArrayList)} + */ + public static interface Action { + public void execute(Expression e); + } + + protected void setMultiAssignmentStatement(ArrayList<DataIdentifier> target, Expression expression, ParserRuleContext ctx, StatementInfo info) { + info.stmt = new MultiAssignmentStatement(target, expression); + info.stmt.setAllPositions(currentFile, ctx.start.getLine(), ctx.start.getCharPositionInLine(), ctx.start.getLine(), ctx.start.getCharPositionInLine()); + setFileLineColumn(info.stmt, ctx); + } + + // ----------------------------------------------------------------- + // End of Helper Functions for exit*FunctionCall*AssignmentStatement + // ----------------------------------------------------------------- + +} http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/6017f069/src/main/java/org/apache/sysml/parser/common/ExpressionInfo.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/parser/common/ExpressionInfo.java b/src/main/java/org/apache/sysml/parser/common/ExpressionInfo.java new file mode 100644 index 0000000..d266a33 --- /dev/null +++ b/src/main/java/org/apache/sysml/parser/common/ExpressionInfo.java @@ -0,0 +1,40 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.sysml.parser.common; + +import org.apache.sysml.parser.Expression; + +/** + * This class exists solely to prevent compiler warnings. + * + * <p> + * The ExpressionInfo and StatementInfo classes are shared among both parsers + * (R-like and Python-like dialects), and Antlr-generated code assumes that + * these classes are present in the parser's namespace. + */ +public class ExpressionInfo { + + public Expression expr = null; + + // For parfor and for + public Expression from = null; + public Expression to = null; + public Expression increment = null; +} http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/6017f069/src/main/java/org/apache/sysml/parser/common/StatementInfo.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/parser/common/StatementInfo.java b/src/main/java/org/apache/sysml/parser/common/StatementInfo.java new file mode 100644 index 0000000..aad5a6a --- /dev/null +++ b/src/main/java/org/apache/sysml/parser/common/StatementInfo.java @@ -0,0 +1,47 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.sysml.parser.common; + +import java.util.HashMap; + +import org.apache.sysml.parser.DMLProgram; +import org.apache.sysml.parser.Statement; + +/** + * This class exists solely to prevent compiler warnings. + * + * <p> + * The ExpressionInfo and StatementInfo classes are shared among both parsers + * (R-like and Python-like dialects), and Antlr-generated code assumes that + * these classes are present in the parser's namespace. + */ + +public class StatementInfo { + + public Statement stmt = null; + + // Valid only for import statements + public HashMap<String,DMLProgram> namespaces = null; + + // Valid only for function statement + //public String namespace = DMLProgram.DEFAULT_NAMESPACE; + public String functionName = ""; + +} http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/6017f069/src/main/java/org/apache/sysml/parser/common/SyntacticErrorListener.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/parser/common/SyntacticErrorListener.java b/src/main/java/org/apache/sysml/parser/common/SyntacticErrorListener.java new file mode 100644 index 0000000..2a669d8 --- /dev/null +++ b/src/main/java/org/apache/sysml/parser/common/SyntacticErrorListener.java @@ -0,0 +1,111 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.sysml.parser.common; + +import org.antlr.v4.runtime.BaseErrorListener; +import org.antlr.v4.runtime.RecognitionException; +import org.antlr.v4.runtime.Recognizer; +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.sysml.api.DMLScript; + +public class SyntacticErrorListener { + + private static final Log LOG = LogFactory.getLog(DMLScript.class.getName()); + + public static class CustomErrorListener extends BaseErrorListener { + + private boolean atleastOneError = false; + private String currentFileName = null; + + public void setCurrentFileName(String currentFilePath) { + currentFileName = currentFilePath; + } + + public String getCurrentFileName() { + return currentFileName; + } + + public void unsetCurrentFileName() { + currentFileName = null; + } + + public void validationError(int line, int charPositionInLine, String msg) { + try { + setAtleastOneError(true); + // Print error messages with file name + if(currentFileName == null) { + LOG.error("line "+line+":"+charPositionInLine+" "+msg); + } + else { + String fileName = currentFileName; + LOG.error(fileName + " line "+line+":"+charPositionInLine+" "+msg); + } + } + catch(Exception e1) { + LOG.error("ERROR: while customizing error message:" + e1); + } + } + + public void validationWarning(int line, int charPositionInLine, String msg) { + try { + //atleastOneError = true; ---> not an error, just warning + // Print error messages with file name + if(currentFileName == null) + LOG.warn("line "+line+":"+charPositionInLine+" "+msg); + else { + String fileName = currentFileName; + LOG.warn(fileName + " line "+line+":"+charPositionInLine+" "+msg); + } + } + catch(Exception e1) { + LOG.warn("ERROR: while customizing error message:" + e1); + } + } + + @Override + public void syntaxError(Recognizer<?, ?> recognizer, Object offendingSymbol, + int line, int charPositionInLine, + String msg, RecognitionException e) + { + try { + setAtleastOneError(true); + // Print error messages with file name + if(currentFileName == null) + LOG.error("line "+line+":"+charPositionInLine+" "+msg); + else { + String fileName = currentFileName; + LOG.error(fileName + " line "+line+":"+charPositionInLine+" "+msg); + } + } + catch(Exception e1) { + LOG.error("ERROR: while customizing error message:" + e1); + } + } + + public boolean isAtleastOneError() { + return atleastOneError; + } + + public void setAtleastOneError(boolean atleastOneError) { + this.atleastOneError = atleastOneError; + } + } +} http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/6017f069/src/main/java/org/apache/sysml/parser/dml/DMLParserWrapper.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/parser/dml/DMLParserWrapper.java b/src/main/java/org/apache/sysml/parser/dml/DMLParserWrapper.java index b64f7d2..7af95dd 100644 --- a/src/main/java/org/apache/sysml/parser/dml/DMLParserWrapper.java +++ b/src/main/java/org/apache/sysml/parser/dml/DMLParserWrapper.java @@ -19,48 +19,34 @@ package org.apache.sysml.parser.dml; -import java.io.BufferedReader; import java.io.ByteArrayInputStream; import java.io.FileNotFoundException; -import java.io.FileReader; import java.io.IOException; import java.io.InputStream; -import java.io.InputStreamReader; import java.util.HashMap; import java.util.Map; +import org.antlr.v4.runtime.ANTLRInputStream; import org.antlr.v4.runtime.BailErrorStrategy; +import org.antlr.v4.runtime.CommonTokenStream; import org.antlr.v4.runtime.DefaultErrorStrategy; import org.antlr.v4.runtime.atn.PredictionMode; import org.antlr.v4.runtime.misc.ParseCancellationException; +import org.antlr.v4.runtime.tree.ParseTree; import org.antlr.v4.runtime.tree.ParseTreeWalker; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; -import org.apache.hadoop.fs.FileSystem; -import org.apache.hadoop.fs.Path; import org.apache.sysml.api.DMLScript; -import org.apache.sysml.conf.ConfigurationManager; import org.apache.sysml.parser.AParserWrapper; import org.apache.sysml.parser.DMLProgram; -import org.apache.sysml.parser.ForStatement; -import org.apache.sysml.parser.ForStatementBlock; import org.apache.sysml.parser.FunctionStatementBlock; -import org.apache.sysml.parser.IfStatement; -import org.apache.sysml.parser.IfStatementBlock; import org.apache.sysml.parser.ImportStatement; import org.apache.sysml.parser.LanguageException; -import org.apache.sysml.parser.ParForStatement; -import org.apache.sysml.parser.ParForStatementBlock; import org.apache.sysml.parser.ParseException; -import org.apache.sysml.parser.Statement; -import org.apache.sysml.parser.StatementBlock; -import org.apache.sysml.parser.WhileStatement; -import org.apache.sysml.parser.WhileStatementBlock; -import org.apache.sysml.parser.dml.DmlParser.DmlprogramContext; +import org.apache.sysml.parser.common.SyntacticErrorListener.CustomErrorListener; import org.apache.sysml.parser.dml.DmlParser.FunctionStatementContext; +import org.apache.sysml.parser.dml.DmlParser.ProgramrootContext; import org.apache.sysml.parser.dml.DmlParser.StatementContext; -import org.apache.sysml.parser.dml.DmlSyntacticErrorListener.CustomDmlErrorListener; -import org.apache.sysml.runtime.util.LocalFileUtils; /** * This is the main entry point for the Antlr4 parser. @@ -76,7 +62,7 @@ import org.apache.sysml.runtime.util.LocalFileUtils; * * To separate logic of semantic validation, DmlSyntaticValidatorHelper contains functions that do semantic validation. Currently, there is no semantic validation as most of it is delegated to subsequent validation phase. * - * Whenever there is a parse error, it goes through DmlSyntacticErrorListener. This allows us to pipe the error messages to any future pipeline as well as control the format in an elegant manner. + * Whenever there is a parse error, it goes through SyntacticErrorListener. This allows us to pipe the error messages to any future pipeline as well as control the format in an elegant manner. * There are three types of messages passed: * - Syntactic errors: When passed DML script doesnot conform to syntatic structure enforced by Dml.g4 * - Validation errors: Errors due to translation of AST to DMLProgram @@ -91,37 +77,6 @@ public class DMLParserWrapper extends AParserWrapper private static final Log LOG = LogFactory.getLog(DMLScript.class.getName()); /** - * Custom wrapper to convert statement into statement blocks. Called by doParse and in DmlSyntacticValidator for for, parfor, while, ... - * @param current a statement - * @return corresponding statement block - */ - public static StatementBlock getStatementBlock(Statement current) { - StatementBlock blk = null; - if(current instanceof ParForStatement) { - blk = new ParForStatementBlock(); - blk.addStatement(current); - } - else if(current instanceof ForStatement) { - blk = new ForStatementBlock(); - blk.addStatement(current); - } - else if(current instanceof IfStatement) { - blk = new IfStatementBlock(); - blk.addStatement(current); - } - else if(current instanceof WhileStatement) { - blk = new WhileStatementBlock(); - blk.addStatement(current); - } - else { - // This includes ImportStatement - blk = new StatementBlock(); - blk.addStatement(current); - } - return blk; - } - - /** * Parses the passed file with command line parameters. You can either pass both (local file) or just dmlScript (hdfs) or just file name (import command) * @param fileName either full path or null --> only used for better error handling * @param dmlScript required @@ -130,25 +85,21 @@ public class DMLParserWrapper extends AParserWrapper * @throws ParseException */ @Override - public DMLProgram parse(String fileName, String dmlScript, HashMap<String,String> argVals) - throws ParseException - { + public DMLProgram parse(String fileName, String dmlScript, HashMap<String,String> argVals) throws ParseException { DMLProgram prog = null; if(dmlScript == null || dmlScript.trim().isEmpty()) { throw new ParseException("Incorrect usage of parse. Please pass dmlScript not just filename"); } - // Set the pipeline required for ANTLR parsing - DMLParserWrapper parser = new DMLParserWrapper(); - prog = parser.doParse(fileName, dmlScript, argVals); + prog = doParse(fileName, dmlScript, argVals); if(prog == null) { throw new ParseException("One or more errors found during parsing (could not construct AST for file: " + fileName + "). Cannot proceed ahead."); } return prog; } - + /** * This function is supposed to be called directly only from DmlSyntacticValidator when it encounters 'import' * @param fileName @@ -157,20 +108,14 @@ public class DMLParserWrapper extends AParserWrapper public DMLProgram doParse(String fileName, String dmlScript, HashMap<String,String> argVals) throws ParseException { DMLProgram dmlPgm = null; - org.antlr.v4.runtime.ANTLRInputStream in; + ANTLRInputStream in; try { if(dmlScript == null) { - dmlScript = readDMLScript(fileName); + dmlScript = readDMLScript(fileName, LOG); } InputStream stream = new ByteArrayInputStream(dmlScript.getBytes()); - in = new org.antlr.v4.runtime.ANTLRInputStream(stream); -// else { -// if(!(new File(fileName)).exists()) { -// throw new ParseException("ERROR: Cannot open file:" + fileName); -// } -// in = new org.antlr.v4.runtime.ANTLRInputStream(new java.io.FileInputStream(fileName)); -// } + in = new ANTLRInputStream(stream); } catch (FileNotFoundException e) { throw new ParseException("ERROR: Cannot find file:" + fileName, e); } catch (IOException e) { @@ -179,12 +124,12 @@ public class DMLParserWrapper extends AParserWrapper throw new ParseException("ERROR: " + e.getMessage(), e); } - DmlprogramContext ast = null; - CustomDmlErrorListener errorListener = new CustomDmlErrorListener(); + ProgramrootContext ast = null; + CustomErrorListener errorListener = new CustomErrorListener(); try { DmlLexer lexer = new DmlLexer(in); - org.antlr.v4.runtime.CommonTokenStream tokens = new org.antlr.v4.runtime.CommonTokenStream(lexer); + CommonTokenStream tokens = new CommonTokenStream(lexer); DmlParser antlr4Parser = new DmlParser(tokens); boolean tryOptimizedParsing = false; // For now no optimization, since it is not able to parse integer value. @@ -195,7 +140,7 @@ public class DMLParserWrapper extends AParserWrapper antlr4Parser.removeErrorListeners(); antlr4Parser.setErrorHandler(new BailErrorStrategy()); try{ - ast = antlr4Parser.dmlprogram(); + ast = antlr4Parser.programroot(); // If successful, no need to try out full LL(*) ... SLL was enough } catch(ParseCancellationException ex) { @@ -203,28 +148,28 @@ public class DMLParserWrapper extends AParserWrapper tokens.reset(); antlr4Parser.reset(); if(fileName != null) { - errorListener.pushCurrentFileName(fileName); - // DmlSyntacticErrorListener.currentFileName.push(fileName); + errorListener.setCurrentFileName(fileName); + // SyntacticErrorListener.currentFileName.push(fileName); } else { - errorListener.pushCurrentFileName("MAIN_SCRIPT"); - // DmlSyntacticErrorListener.currentFileName.push("MAIN_SCRIPT"); + errorListener.setCurrentFileName("MAIN_SCRIPT"); + // SyntacticErrorListener.currentFileName.push("MAIN_SCRIPT"); } // Set our custom error listener antlr4Parser.addErrorListener(errorListener); antlr4Parser.setErrorHandler(new DefaultErrorStrategy()); antlr4Parser.getInterpreter().setPredictionMode(PredictionMode.LL); - ast = antlr4Parser.dmlprogram(); + ast = antlr4Parser.programroot(); } } else { // Set our custom error listener antlr4Parser.removeErrorListeners(); antlr4Parser.addErrorListener(errorListener); - errorListener.pushCurrentFileName(fileName); + errorListener.setCurrentFileName(fileName); // Now do the parsing - ast = antlr4Parser.dmlprogram(); + ast = antlr4Parser.programroot(); } } catch(Exception e) { @@ -235,13 +180,12 @@ public class DMLParserWrapper extends AParserWrapper try { // Now convert the parse tree into DMLProgram // Do syntactic validation while converting - org.antlr.v4.runtime.tree.ParseTree tree = ast; + ParseTree tree = ast; // And also do syntactic validation - org.antlr.v4.runtime.tree.ParseTreeWalker walker = new ParseTreeWalker(); - DmlSyntacticValidatorHelper helper = new DmlSyntacticValidatorHelper(errorListener); - DmlSyntacticValidator validator = new DmlSyntacticValidator(helper, errorListener.peekFileName(), argVals); + ParseTreeWalker walker = new ParseTreeWalker(); + DmlSyntacticValidator validator = new DmlSyntacticValidator(errorListener, argVals); walker.walk(validator, tree); - errorListener.popFileName(); + errorListener.unsetCurrentFileName(); if(errorListener.isAtleastOneError()) { return null; } @@ -254,7 +198,7 @@ public class DMLParserWrapper extends AParserWrapper return dmlPgm; } - private DMLProgram createDMLProgram(DmlprogramContext ast) { + private DMLProgram createDMLProgram(ProgramrootContext ast) { DMLProgram dmlPgm = new DMLProgram(); @@ -328,58 +272,5 @@ public class DMLParserWrapper extends AParserWrapper return dmlPgm; } - public static String readDMLScript( String script ) - throws IOException, LanguageException - { - String dmlScriptStr = null; - - //read DML script from file - if(script == null) - throw new LanguageException("DML script path was not specified!"); - - StringBuilder sb = new StringBuilder(); - BufferedReader in = null; - try - { - //read from hdfs or gpfs file system - if( script.startsWith("hdfs:") - || script.startsWith("gpfs:") ) - { - if( !LocalFileUtils.validateExternalFilename(script, true) ) - throw new LanguageException("Invalid (non-trustworthy) hdfs filename."); - FileSystem fs = FileSystem.get(ConfigurationManager.getCachedJobConf()); - Path scriptPath = new Path(script); - in = new BufferedReader(new InputStreamReader(fs.open(scriptPath))); - } - // from local file system - else - { - if( !LocalFileUtils.validateExternalFilename(script, false) ) - throw new LanguageException("Invalid (non-trustworthy) local filename."); - in = new BufferedReader(new FileReader(script)); - } - - //core script reading - String tmp = null; - while ((tmp = in.readLine()) != null) - { - sb.append( tmp ); - sb.append( "\n" ); - } - } - catch (IOException ex) - { - LOG.error("Failed to read the script from the file system", ex); - throw ex; - } - finally - { - if( in != null ) - in.close(); - } - - dmlScriptStr = sb.toString(); - - return dmlScriptStr; - } + } http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/6017f069/src/main/java/org/apache/sysml/parser/dml/Dml.g4 ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/parser/dml/Dml.g4 b/src/main/java/org/apache/sysml/parser/dml/Dml.g4 index 0c4e62c..a07aece 100644 --- a/src/main/java/org/apache/sysml/parser/dml/Dml.g4 +++ b/src/main/java/org/apache/sysml/parser/dml/Dml.g4 @@ -6,9 +6,9 @@ * to you under the Apache License, Version 2.0 (the * "License"); you may not use this file except in compliance * with the License. You may obtain a copy of the License at - * + * * http://www.apache.org/licenses/LICENSE-2.0 - * + * * Unless required by applicable law or agreed to in writing, * software distributed under the License is distributed on an * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY @@ -39,11 +39,13 @@ grammar Dml; * specific language governing permissions and limitations * under the License. */ + import org.apache.sysml.parser.common.ExpressionInfo; + import org.apache.sysml.parser.common.StatementInfo; } // DML Program is a list of expression // For now, we only allow global function definitions (not nested or inside a while block) -dmlprogram: (blocks+=statement | functionBlocks+=functionStatement)* EOF; +programroot: (blocks+=statement | functionBlocks+=functionStatement)* EOF; statement returns [ StatementInfo info ] @init { @@ -59,13 +61,13 @@ statement returns [ StatementInfo info ] // For backward compatibility and also since the behavior of foo() * A + foo() ... where foo returns A // Convert FunctionCallIdentifier(paramExprs, ..) -> source | // TODO: Throw an informative error if user doesnot provide the optional assignment - ( targetList+=dataIdentifier ('='|'<-') )? name=ID '(' (paramExprs+=parameterizedExpression (',' paramExprs+=parameterizedExpression)* )? ')' ';'* # FunctionCallAssignmentStatement + ( targetList=dataIdentifier ('='|'<-') )? name=ID '(' (paramExprs+=parameterizedExpression (',' paramExprs+=parameterizedExpression)* )? ')' ';'* # FunctionCallAssignmentStatement | '[' targetList+=dataIdentifier (',' targetList+=dataIdentifier)* ']' ('='|'<-') name=ID '(' (paramExprs+=parameterizedExpression (',' paramExprs+=parameterizedExpression)* )? ')' ';'* # FunctionCallMultiAssignmentStatement // {notifyErrorListeners("Too many parentheses");} // ------------------------------------------ // AssignmentStatement - | targetList+=dataIdentifier op=('<-'|'=') 'ifdef' '(' commandLineParam=dataIdentifier ',' source=expression ')' ';'* # IfdefAssignmentStatement - | targetList+=dataIdentifier op=('<-'|'=') source=expression ';'* # AssignmentStatement + | targetList=dataIdentifier op=('<-'|'=') 'ifdef' '(' commandLineParam=dataIdentifier ',' source=expression ')' ';'* # IfdefAssignmentStatement + | targetList=dataIdentifier op=('<-'|'=') source=expression ';'* # AssignmentStatement // ------------------------------------------ // We don't support block statement // | '{' body+=expression ';'* ( body+=expression ';'* )* '}' # BlockStatement http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/6017f069/src/main/java/org/apache/sysml/parser/dml/DmlSyntacticErrorListener.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/parser/dml/DmlSyntacticErrorListener.java b/src/main/java/org/apache/sysml/parser/dml/DmlSyntacticErrorListener.java deleted file mode 100644 index 027ea08..0000000 --- a/src/main/java/org/apache/sysml/parser/dml/DmlSyntacticErrorListener.java +++ /dev/null @@ -1,115 +0,0 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.sysml.parser.dml; - -import org.antlr.v4.runtime.BaseErrorListener; -import org.antlr.v4.runtime.RecognitionException; -import org.antlr.v4.runtime.Recognizer; -import org.apache.commons.logging.Log; -import org.apache.commons.logging.LogFactory; - -import org.apache.sysml.api.DMLScript; - -import java.util.Stack; - -public class DmlSyntacticErrorListener { - - - private static final Log LOG = LogFactory.getLog(DMLScript.class.getName()); - - public static class CustomDmlErrorListener extends BaseErrorListener { - - private boolean atleastOneError = false; - private Stack<String> currentFileName = new Stack<String>(); - - public void pushCurrentFileName(String currentFilePath) { - currentFileName.push(currentFilePath); - } - - public String peekFileName() { - return currentFileName.peek(); - } - - public String popFileName() { - return currentFileName.pop(); - } - - public void validationError(int line, int charPositionInLine, String msg) { - try { - setAtleastOneError(true); - // Print error messages with file name - if(currentFileName == null || currentFileName.empty()) { - LOG.error("line "+line+":"+charPositionInLine+" "+msg); - } - else { - String fileName = currentFileName.peek(); - LOG.error(fileName + " line "+line+":"+charPositionInLine+" "+msg); - } - } - catch(Exception e1) { - LOG.error("ERROR: while customizing error message:" + e1); - } - } - - public void validationWarning(int line, int charPositionInLine, String msg) { - try { - //atleastOneError = true; ---> not an error, just warning - // Print error messages with file name - if(currentFileName == null || currentFileName.empty()) - LOG.warn("line "+line+":"+charPositionInLine+" "+msg); - else { - String fileName = currentFileName.peek(); - LOG.warn(fileName + " line "+line+":"+charPositionInLine+" "+msg); - } - } - catch(Exception e1) { - LOG.warn("ERROR: while customizing error message:" + e1); - } - } - - @Override - public void syntaxError(Recognizer<?, ?> recognizer, Object offendingSymbol, - int line, int charPositionInLine, - String msg, RecognitionException e) - { - try { - setAtleastOneError(true); - // Print error messages with file name - if(currentFileName == null || currentFileName.empty()) - LOG.error("line "+line+":"+charPositionInLine+" "+msg); - else { - String fileName = currentFileName.peek(); - LOG.error(fileName + " line "+line+":"+charPositionInLine+" "+msg); - } - } - catch(Exception e1) { - LOG.error("ERROR: while customizing error message:" + e1); - } - } - - public boolean isAtleastOneError() { - return atleastOneError; - } - - public void setAtleastOneError(boolean atleastOneError) { - this.atleastOneError = atleastOneError; - } - } -}
