Repository: incubator-systemml Updated Branches: refs/heads/master b4d9dcdaa -> 2f9769e20
[SYSTEMML-568] Frame New MLContext support Project: http://git-wip-us.apache.org/repos/asf/incubator-systemml/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-systemml/commit/2f9769e2 Tree: http://git-wip-us.apache.org/repos/asf/incubator-systemml/tree/2f9769e2 Diff: http://git-wip-us.apache.org/repos/asf/incubator-systemml/diff/2f9769e2 Branch: refs/heads/master Commit: 2f9769e209aef4a17ab8fbdf4d9b1175a5512a60 Parents: b4d9dcd Author: Arvind Surve <[email protected]> Authored: Wed Aug 31 16:21:32 2016 -0700 Committer: Arvind Surve <[email protected]> Committed: Wed Aug 31 16:21:32 2016 -0700 ---------------------------------------------------------------------- .../java/org/apache/sysml/api/MLContext.java | 15 +- .../apache/sysml/api/mlcontext/MLContext.java | 84 ++-- .../api/mlcontext/MLContextConversionUtil.java | 423 ++++++++++++++++++- .../sysml/api/mlcontext/MLContextUtil.java | 77 +++- .../apache/sysml/api/mlcontext/MLResults.java | 81 ++++ .../org/apache/sysml/api/mlcontext/Script.java | 50 ++- .../sysml/api/mlcontext/ScriptExecutor.java | 2 +- .../spark/ReblockSPInstruction.java | 21 + .../integration/mlcontext/MLContextTest.java | 322 ++++++++++++++ .../org/apache/sysml/api/mlcontext/FrameA.csv | 3 + .../apache/sysml/api/mlcontext/FrameA.csv.mtd | 13 + .../org/apache/sysml/api/mlcontext/FrameA.ijv | 12 + .../apache/sysml/api/mlcontext/FrameA.ijv.mtd | 13 + .../org/apache/sysml/api/mlcontext/FrameB.csv | 2 + .../apache/sysml/api/mlcontext/FrameB.csv.mtd | 13 + .../org/apache/sysml/api/mlcontext/FrameB.ijv | 6 + .../apache/sysml/api/mlcontext/FrameB.ijv.mtd | 13 + 17 files changed, 1079 insertions(+), 71 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/2f9769e2/src/main/java/org/apache/sysml/api/MLContext.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/api/MLContext.java b/src/main/java/org/apache/sysml/api/MLContext.java index 297d3a5..649ffee 100644 --- a/src/main/java/org/apache/sysml/api/MLContext.java +++ b/src/main/java/org/apache/sysml/api/MLContext.java @@ -583,25 +583,21 @@ public class MLContext { if(_inVarnames == null) _inVarnames = new ArrayList<String>(); - //FIXME: MB why does the register input for frames implicitly convert the data to binary block, - //while the register input for matrices does not? FIXME - JavaPairRDD<LongWritable, Text> rddText = rddIn.mapToPair(new ConvertStringToLongTextPair()); int blksz = ConfigurationManager.getBlocksize(); MatrixCharacteristics mc = new MatrixCharacteristics(rlen, clen, blksz, blksz, nnz); - FrameObject fo = new FrameObject(null, new MatrixFormatMetaData(mc, OutputInfo.BinaryBlockOutputInfo, InputInfo.BinaryBlockInputInfo)); - JavaPairRDD<Long, FrameBlock> rdd = null; + FrameObject fo = null; if( format.equals("csv") ) { CSVFileFormatProperties csvprops = (props!=null) ? (CSVFileFormatProperties)props: new CSVFileFormatProperties(); - rdd = FrameRDDConverterUtils.csvToBinaryBlock(new JavaSparkContext(getSparkContext()), - rddText, mc, csvprops.hasHeader(), csvprops.getDelim(), csvprops.isFill(), csvprops.getFillValue()); + fo = new FrameObject(null, new MatrixFormatMetaData(mc, OutputInfo.CSVOutputInfo, InputInfo.CSVInputInfo)); + fo.setFileFormatProperties(csvprops); } else if( format.equals("text") ) { if(rlen == -1 || clen == -1) { throw new DMLRuntimeException("The metadata is required in registerInput for format:" + format); } - rdd = FrameRDDConverterUtils.textCellToBinaryBlock(new JavaSparkContext(getSparkContext()), rddText, mc, schema); + fo = new FrameObject(null, new MatrixFormatMetaData(mc, OutputInfo.TextCellOutputInfo, InputInfo.TextCellInputInfo)); } else { @@ -610,7 +606,8 @@ public class MLContext { if(props != null) fo.setFileFormatProperties(props); - fo.setRDDHandle(new RDDObject(rdd, varName)); + fo.setRDDHandle(new RDDObject(rddText, varName)); + fo.setSchema("String"); //TODO fix schema _variables.put(varName, fo); _inVarnames.add(varName); checkIfRegisteringInputAllowed(); http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/2f9769e2/src/main/java/org/apache/sysml/api/mlcontext/MLContext.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/api/mlcontext/MLContext.java b/src/main/java/org/apache/sysml/api/mlcontext/MLContext.java index 8f809f8..be37575 100644 --- a/src/main/java/org/apache/sysml/api/mlcontext/MLContext.java +++ b/src/main/java/org/apache/sysml/api/mlcontext/MLContext.java @@ -377,47 +377,55 @@ public class MLContext { DataExpression exp = (DataExpression) source; // Do not check metadata file for registered reads exp.setCheckMetadata(false); - - MatrixObject mo = getMatrixObject(target); - if (mo != null) { - int blp = source.getBeginLine(); - int bcp = source.getBeginColumn(); - int elp = source.getEndLine(); - int ecp = source.getEndColumn(); - exp.addVarParam(DataExpression.READROWPARAM, - new IntIdentifier(mo.getNumRows(), source.getFilename(), blp, bcp, elp, ecp)); - exp.addVarParam(DataExpression.READCOLPARAM, - new IntIdentifier(mo.getNumColumns(), source.getFilename(), blp, bcp, elp, ecp)); - exp.addVarParam(DataExpression.READNUMNONZEROPARAM, - new IntIdentifier(mo.getNnz(), source.getFilename(), blp, bcp, elp, ecp)); - exp.addVarParam(DataExpression.DATATYPEPARAM, new StringIdentifier("matrix", source.getFilename(), - blp, bcp, elp, ecp)); - exp.addVarParam(DataExpression.VALUETYPEPARAM, new StringIdentifier("double", source.getFilename(), - blp, bcp, elp, ecp)); - - if (mo.getMetaData() instanceof MatrixFormatMetaData) { - MatrixFormatMetaData metaData = (MatrixFormatMetaData) mo.getMetaData(); - if (metaData.getOutputInfo() == OutputInfo.CSVOutputInfo) { - exp.addVarParam(DataExpression.FORMAT_TYPE, new StringIdentifier( - DataExpression.FORMAT_TYPE_VALUE_CSV, source.getFilename(), blp, bcp, elp, ecp)); - } else if (metaData.getOutputInfo() == OutputInfo.TextCellOutputInfo) { - exp.addVarParam(DataExpression.FORMAT_TYPE, new StringIdentifier( - DataExpression.FORMAT_TYPE_VALUE_TEXT, source.getFilename(), blp, bcp, elp, ecp)); - } else if (metaData.getOutputInfo() == OutputInfo.BinaryBlockOutputInfo) { - exp.addVarParam( - DataExpression.ROWBLOCKCOUNTPARAM, - new IntIdentifier(mo.getNumRowsPerBlock(), source.getFilename(), blp, bcp, elp, ecp)); - exp.addVarParam(DataExpression.COLUMNBLOCKCOUNTPARAM, - new IntIdentifier(mo.getNumColumnsPerBlock(), source.getFilename(), blp, bcp, elp, - ecp)); - exp.addVarParam(DataExpression.FORMAT_TYPE, new StringIdentifier( - DataExpression.FORMAT_TYPE_VALUE_BINARY, source.getFilename(), blp, bcp, elp, ecp)); - } else { - throw new MLContextException("Unsupported format through MLContext"); + + //Value retured from getVarParam is of type stringidentifier at runtime, but at compile type its Expression + //Could not find better way to compare this condition. + Expression datatypeExp = ((DataExpression)source).getVarParam("data_type"); + String datatype = "matrix"; + if(datatypeExp != null) + datatype = datatypeExp.toString(); + + if(datatype.compareToIgnoreCase("frame") != 0) { + MatrixObject mo = getMatrixObject(target); + if (mo != null) { + int blp = source.getBeginLine(); + int bcp = source.getBeginColumn(); + int elp = source.getEndLine(); + int ecp = source.getEndColumn(); + exp.addVarParam(DataExpression.READROWPARAM, + new IntIdentifier(mo.getNumRows(), source.getFilename(), blp, bcp, elp, ecp)); + exp.addVarParam(DataExpression.READCOLPARAM, + new IntIdentifier(mo.getNumColumns(), source.getFilename(), blp, bcp, elp, ecp)); + exp.addVarParam(DataExpression.READNUMNONZEROPARAM, + new IntIdentifier(mo.getNnz(), source.getFilename(), blp, bcp, elp, ecp)); + exp.addVarParam(DataExpression.DATATYPEPARAM, new StringIdentifier("matrix", source.getFilename(), + blp, bcp, elp, ecp)); + exp.addVarParam(DataExpression.VALUETYPEPARAM, new StringIdentifier("double", source.getFilename(), + blp, bcp, elp, ecp)); + + if (mo.getMetaData() instanceof MatrixFormatMetaData) { + MatrixFormatMetaData metaData = (MatrixFormatMetaData) mo.getMetaData(); + if (metaData.getOutputInfo() == OutputInfo.CSVOutputInfo) { + exp.addVarParam(DataExpression.FORMAT_TYPE, new StringIdentifier( + DataExpression.FORMAT_TYPE_VALUE_CSV, source.getFilename(), blp, bcp, elp, ecp)); + } else if (metaData.getOutputInfo() == OutputInfo.TextCellOutputInfo) { + exp.addVarParam(DataExpression.FORMAT_TYPE, new StringIdentifier( + DataExpression.FORMAT_TYPE_VALUE_TEXT, source.getFilename(), blp, bcp, elp, ecp)); + } else if (metaData.getOutputInfo() == OutputInfo.BinaryBlockOutputInfo) { + exp.addVarParam( + DataExpression.ROWBLOCKCOUNTPARAM, + new IntIdentifier(mo.getNumRowsPerBlock(), source.getFilename(), blp, bcp, elp, ecp)); + exp.addVarParam(DataExpression.COLUMNBLOCKCOUNTPARAM, + new IntIdentifier(mo.getNumColumnsPerBlock(), source.getFilename(), blp, bcp, elp, + ecp)); + exp.addVarParam(DataExpression.FORMAT_TYPE, new StringIdentifier( + DataExpression.FORMAT_TYPE_VALUE_BINARY, source.getFilename(), blp, bcp, elp, ecp)); + } else { + throw new MLContextException("Unsupported format through MLContext"); + } } } } - } } http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/2f9769e2/src/main/java/org/apache/sysml/api/mlcontext/MLContextConversionUtil.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/api/mlcontext/MLContextConversionUtil.java b/src/main/java/org/apache/sysml/api/mlcontext/MLContextConversionUtil.java index 63de638..7feb86a 100644 --- a/src/main/java/org/apache/sysml/api/mlcontext/MLContextConversionUtil.java +++ b/src/main/java/org/apache/sysml/api/mlcontext/MLContextConversionUtil.java @@ -22,6 +22,7 @@ package org.apache.sysml.api.mlcontext; import java.io.InputStream; import java.net.URL; import java.util.ArrayList; +import java.util.Collections; import java.util.Iterator; import java.util.List; @@ -50,6 +51,7 @@ import org.apache.sysml.runtime.instructions.spark.data.RDDObject; import org.apache.sysml.runtime.instructions.spark.functions.ConvertStringToLongTextPair; import org.apache.sysml.runtime.instructions.spark.functions.CopyBlockPairFunction; import org.apache.sysml.runtime.instructions.spark.functions.CopyTextInputFunction; +import org.apache.sysml.runtime.instructions.spark.utils.FrameRDDConverterUtils; import org.apache.sysml.runtime.instructions.spark.utils.RDDAggregateUtils; import org.apache.sysml.runtime.instructions.spark.utils.RDDConverterUtilsExt; import org.apache.sysml.runtime.instructions.spark.utils.RDDConverterUtilsExt.DataFrameAnalysisFunction; @@ -200,7 +202,7 @@ public class MLContextConversionUtil { * the matrix metadata * @return the {@code FrameBlock} converted to a {@code FrameObject} */ - public static FrameObject frameBlockToframeObject(String variableName, FrameBlock frameBlock, + public static FrameObject frameBlockToFrameObject(String variableName, FrameBlock frameBlock, MatrixMetadata matrixMetadata) { try { MatrixCharacteristics matrixCharacteristics; @@ -216,7 +218,7 @@ public class MLContextConversionUtil { frameObject.release(); return frameObject; } catch (CacheException e) { - throw new MLContextException("Exception converting MatrixBlock to MatrixObject", e); + throw new MLContextException("Exception converting FrameBlock to FrameObject", e); } } @@ -271,6 +273,55 @@ public class MLContextConversionUtil { } /** + * Convert a {@code JavaPairRDD<Long, FrameBlock>} to a + * {@code FrameObject}. + * + * @param variableName + * name of the variable associated with the frame + * @param binaryBlocks + * {@code JavaPairRDD<Long, FrameBlock>} representation + * of a binary-block frame + * @return the {@code JavaPairRDD<Long, FrameBlock>} frame + * converted to a {@code FrameObject} + */ + public static FrameObject binaryBlocksToFrameObject(String variableName, + JavaPairRDD<Long, FrameBlock> binaryBlocks) { + return binaryBlocksToFrameObject(variableName, binaryBlocks, null); + } + + /** + * Convert a {@code JavaPairRDD<Long, FrameBlock>} to a + * {@code FrameObject}. + * + * @param variableName + * name of the variable associated with the frame + * @param binaryBlocks + * {@code JavaPairRDD<Long, FrameBlock>} representation + * of a binary-block frame + * @param matrixMetadata + * the matrix metadata + * @return the {@code JavaPairRDD<Long, FrameBlock>} frame + * converted to a {@code FrameObject} + */ + public static FrameObject binaryBlocksToFrameObject(String variableName, + JavaPairRDD<Long, FrameBlock> binaryBlocks, MatrixMetadata matrixMetadata) { + + MatrixCharacteristics matrixCharacteristics; + if (matrixMetadata != null) { + matrixCharacteristics = matrixMetadata.asMatrixCharacteristics(); + } else { + matrixCharacteristics = new MatrixCharacteristics(); + } + + MatrixFormatMetaData mtd = new MatrixFormatMetaData(matrixCharacteristics, + OutputInfo.BinaryBlockOutputInfo, InputInfo.BinaryBlockInputInfo); + FrameObject frameObject = new FrameObject(MLContextUtil.scratchSpace() + "/" + "temp_" + + System.nanoTime() + variableName, mtd); + frameObject.setRDDHandle(new RDDObject(binaryBlocks, variableName)); + return frameObject; + } + + /** * Convert a {@code DataFrame} to a {@code MatrixObject}. * * @param variableName @@ -309,6 +360,52 @@ public class MLContextConversionUtil { } /** + * Convert a {@code DataFrame} to a {@code FrameObject}. + * + * @param variableName + * name of the variable associated with the frame + * @param dataFrame + * the Spark {@code DataFrame} + * @return the {@code DataFrame} matrix converted to a converted to a + * {@code FrameObject} + * @throws DMLRuntimeException + */ + public static FrameObject dataFrameToFrameObject(String variableName, DataFrame dataFrame) throws DMLRuntimeException { + return dataFrameToFrameObject(variableName, dataFrame, null); + } + + /** + * Convert a {@code DataFrame} to a {@code FrameObject}. + * + * @param variableName + * name of the variable associated with the frame + * @param dataFrame + * the Spark {@code DataFrame} + * @param matrixMetadata + * the matrix metadata + * @return the {@code DataFrame} frame converted to a converted to a + * {@code FrameObject} + * @throws DMLRuntimeException + */ + public static FrameObject dataFrameToFrameObject(String variableName, DataFrame dataFrame, + MatrixMetadata matrixMetadata) throws DMLRuntimeException { + if (matrixMetadata == null) { + matrixMetadata = new MatrixMetadata(); + } + + JavaSparkContext javaSparkContext = MLContextUtil.getJavaSparkContext((MLContext) MLContextProxy.getActiveMLContext()); + boolean containsID = isDataFrameWithIDColumn(matrixMetadata); + MatrixCharacteristics matrixCharacteristics = matrixMetadata.asMatrixCharacteristics(); + JavaPairRDD<Long, FrameBlock> binaryBlock = + FrameRDDConverterUtils.dataFrameToBinaryBlock(javaSparkContext, dataFrame, + matrixCharacteristics, containsID); + + FrameObject frameObject = MLContextConversionUtil.binaryBlocksToFrameObject(variableName, binaryBlock, + matrixMetadata); + return frameObject; + } + + /** * Convert a {@code DataFrame} to a * {@code JavaPairRDD<MatrixIndexes, MatrixBlock>} binary-block matrix. * @@ -529,6 +626,55 @@ public class MLContextConversionUtil { } /** + * Convert a {@code JavaRDD<String>} in CSV format to a {@code FrameObject} + * + * @param variableName + * name of the variable associated with the frame + * @param javaRDD + * the Java RDD of strings + * @return the {@code JavaRDD<String>} converted to a {@code FrameObject} + */ + public static FrameObject javaRDDStringCSVToFrameObject(String variableName, JavaRDD<String> javaRDD) { + return javaRDDStringCSVToFrameObject(variableName, javaRDD, null); + } + + /** + * Convert a {@code JavaRDD<String>} in CSV format to a {@code FrameObject} + * + * @param variableName + * name of the variable associated with the frame + * @param javaRDD + * the Java RDD of strings + * @param matrixMetadata + * matrix metadata + * @return the {@code JavaRDD<String>} converted to a {@code FrameObject} + */ + public static FrameObject javaRDDStringCSVToFrameObject(String variableName, JavaRDD<String> javaRDD, + MatrixMetadata matrixMetadata) { + JavaPairRDD<LongWritable, Text> javaPairRDD = javaRDD.mapToPair(new ConvertStringToLongTextPair()); + MatrixCharacteristics matrixCharacteristics; + if (matrixMetadata != null) { + matrixCharacteristics = matrixMetadata.asMatrixCharacteristics(); + } else { + matrixCharacteristics = new MatrixCharacteristics(); + } + JavaPairRDD<LongWritable, Text> javaPairRDDText = javaPairRDD.mapToPair(new CopyTextInputFunction()); + + JavaSparkContext jsc = MLContextUtil.getJavaSparkContext((MLContext) MLContextProxy.getActiveMLContext()); + + FrameObject frameObject = new FrameObject(null, new MatrixFormatMetaData(matrixCharacteristics, OutputInfo.BinaryBlockOutputInfo, InputInfo.BinaryBlockInputInfo)); + JavaPairRDD<Long, FrameBlock> rdd; + try { + rdd = FrameRDDConverterUtils.csvToBinaryBlock(jsc, javaPairRDDText, matrixCharacteristics, false, ",", false, -1); + } catch (DMLRuntimeException e) { + e.printStackTrace(); + return null; + } + frameObject.setRDDHandle(new RDDObject(rdd, variableName)); + return frameObject; + } + + /** * Convert a {@code JavaRDD<String>} in IJV format to a {@code MatrixObject} * . Note that metadata is required for IJV format. * @@ -557,6 +703,47 @@ public class MLContextConversionUtil { } /** + * Convert a {@code JavaRDD<String>} in IJV format to a {@code FrameObject} + * . Note that metadata is required for IJV format. + * + * @param variableName + * name of the variable associated with the frame + * @param javaRDD + * the Java RDD of strings + * @param matrixMetadata + * matrix metadata + * @return the {@code JavaRDD<String>} converted to a {@code FrameObject} + */ + public static FrameObject javaRDDStringIJVToFrameObject(String variableName, JavaRDD<String> javaRDD, + MatrixMetadata matrixMetadata) { + JavaPairRDD<LongWritable, Text> javaPairRDD = javaRDD.mapToPair(new ConvertStringToLongTextPair()); + MatrixCharacteristics matrixCharacteristics; + if (matrixMetadata != null) { + matrixCharacteristics = matrixMetadata.asMatrixCharacteristics(); + } else { + matrixCharacteristics = new MatrixCharacteristics(); + } + + JavaPairRDD<LongWritable, Text> javaPairRDDText = javaPairRDD.mapToPair(new CopyTextInputFunction()); + + JavaSparkContext jsc = MLContextUtil.getJavaSparkContext((MLContext) MLContextProxy.getActiveMLContext()); + + FrameObject frameObject = new FrameObject(null, new MatrixFormatMetaData(matrixCharacteristics, OutputInfo.BinaryBlockOutputInfo, InputInfo.BinaryBlockInputInfo)); + JavaPairRDD<Long, FrameBlock> rdd; + try { + List<ValueType> lschema = null; + if(lschema == null) + lschema = Collections.nCopies((int)matrixCharacteristics.getCols(), ValueType.STRING); + rdd = FrameRDDConverterUtils.textCellToBinaryBlock(jsc, javaPairRDDText, matrixCharacteristics, lschema); + } catch (DMLRuntimeException e) { + e.printStackTrace(); + return null; + } + frameObject.setRDDHandle(new RDDObject(rdd, variableName)); + return frameObject; + } + + /** * Convert a {@code RDD<String>} in CSV format to a {@code MatrixObject} * * @param variableName @@ -588,6 +775,37 @@ public class MLContextConversionUtil { } /** + * Convert a {@code RDD<String>} in CSV format to a {@code FrameObject} + * + * @param variableName + * name of the variable associated with the frame + * @param rdd + * the RDD of strings + * @return the {@code RDD<String>} converted to a {@code FrameObject} + */ + public static FrameObject rddStringCSVToFrameObject(String variableName, RDD<String> rdd) { + return rddStringCSVToFrameObject(variableName, rdd, null); + } + + /** + * Convert a {@code RDD<String>} in CSV format to a {@code FrameObject} + * + * @param variableName + * name of the variable associated with the frame + * @param rdd + * the RDD of strings + * @param matrixMetadata + * frame metadata + * @return the {@code RDD<String>} converted to a {@code FrameObject} + */ + public static FrameObject rddStringCSVToFrameObject(String variableName, RDD<String> rdd, + MatrixMetadata matrixMetadata) { + ClassTag<String> tag = scala.reflect.ClassTag$.MODULE$.apply(String.class); + JavaRDD<String> javaRDD = JavaRDD.fromRDD(rdd, tag); + return javaRDDStringCSVToFrameObject(variableName, javaRDD, matrixMetadata); + } + + /** * Convert a {@code RDD<String>} in IJV format to a {@code MatrixObject}. * Note that metadata is required for IJV format. * @@ -607,6 +825,25 @@ public class MLContextConversionUtil { } /** + * Convert a {@code RDD<String>} in IJV format to a {@code FrameObject}. + * Note that metadata is required for IJV format. + * + * @param variableName + * name of the variable associated with the frame + * @param rdd + * the RDD of strings + * @param matrixMetadata + * frame metadata + * @return the {@code RDD<String>} converted to a {@code FrameObject} + */ + public static FrameObject rddStringIJVToFrameObject(String variableName, RDD<String> rdd, + MatrixMetadata matrixMetadata) { + ClassTag<String> tag = scala.reflect.ClassTag$.MODULE$.apply(String.class); + JavaRDD<String> javaRDD = JavaRDD.fromRDD(rdd, tag); + return javaRDDStringIJVToFrameObject(variableName, javaRDD, matrixMetadata); + } + + /** * Convert an {@code BinaryBlockMatrix} to a {@code JavaRDD<String>} in IVJ * format. * @@ -661,6 +898,22 @@ public class MLContextConversionUtil { } /** + * Convert a {@code FrameObject} to a {@code JavaRDD<String>} in CSV + * format. + * + * @param frameObject + * the {@code FrameObject} + * @return the {@code FrameObject} converted to a {@code JavaRDD<String>} + */ + public static JavaRDD<String> frameObjectToJavaRDDStringCSV(FrameObject frameObject, String delimiter) { + List<String> list = frameObjectToListStringCSV(frameObject, delimiter); + + JavaSparkContext jsc = MLContextUtil.getJavaSparkContext((MLContext)MLContextProxy.getActiveMLContext()); + JavaRDD<String> javaRDDStringCSV = jsc.parallelize(list); + return javaRDDStringCSV; + } + + /** * Convert a {@code MatrixObject} to a {@code JavaRDD<String>} in IJV * format. * @@ -680,6 +933,22 @@ public class MLContextConversionUtil { } /** + * Convert a {@code FrameObject} to a {@code JavaRDD<String>} in IJV + * format. + * + * @param frameObject + * the {@code FrameObject} + * @return the {@code FrameObject} converted to a {@code JavaRDD<String>} + */ + public static JavaRDD<String> frameObjectToJavaRDDStringIJV(FrameObject frameObject) { + List<String> list = frameObjectToListStringIJV(frameObject); + + JavaSparkContext jsc = MLContextUtil.getJavaSparkContext((MLContext)MLContextProxy.getActiveMLContext()); + JavaRDD<String> javaRDDStringIJV = jsc.parallelize(list); + return javaRDDStringIJV; + } + + /** * Convert a {@code MatrixObject} to a {@code RDD<String>} in IJV format. * * @param matrixObject @@ -708,6 +977,33 @@ public class MLContextConversionUtil { } /** + * Convert a {@code FrameObject} to a {@code RDD<String>} in IJV format. + * + * @param frameObject + * the {@code FrameObject} + * @return the {@code FrameObject} converted to a {@code RDD<String>} + */ + public static RDD<String> frameObjectToRDDStringIJV(FrameObject frameObject) { + + // NOTE: The following works when called from Java but does not + // currently work when called from Spark Shell (when you call + // collect() on the RDD<String>). + // + // JavaRDD<String> javaRDD = jsc.parallelize(list); + // RDD<String> rdd = JavaRDD.toRDD(javaRDD); + // + // Therefore, we call parallelize() on the SparkContext rather than + // the JavaSparkContext to produce the RDD<String> for Scala. + + List<String> list = frameObjectToListStringIJV(frameObject); + + SparkContext sc = MLContextUtil.getSparkContext((MLContext) MLContextProxy.getActiveMLContext()); + ClassTag<String> tag = scala.reflect.ClassTag$.MODULE$.apply(String.class); + RDD<String> rddString = sc.parallelize(JavaConversions.asScalaBuffer(list), sc.defaultParallelism(), tag); + return rddString; + } + + /** * Convert a {@code MatrixObject} to a {@code RDD<String>} in CSV format. * * @param matrixObject @@ -736,6 +1032,33 @@ public class MLContextConversionUtil { } /** + * Convert a {@code FrameObject} to a {@code RDD<String>} in CSV format. + * + * @param frameObject + * the {@code FrameObject} + * @return the {@code FrameObject} converted to a {@code RDD<String>} + */ + public static RDD<String> frameObjectToRDDStringCSV(FrameObject frameObject, String delimiter) { + + // NOTE: The following works when called from Java but does not + // currently work when called from Spark Shell (when you call + // collect() on the RDD<String>). + // + // JavaRDD<String> javaRDD = jsc.parallelize(list); + // RDD<String> rdd = JavaRDD.toRDD(javaRDD); + // + // Therefore, we call parallelize() on the SparkContext rather than + // the JavaSparkContext to produce the RDD<String> for Scala. + + List<String> list = frameObjectToListStringCSV(frameObject, delimiter); + + SparkContext sc = MLContextUtil.getSparkContext((MLContext) MLContextProxy.getActiveMLContext()); + ClassTag<String> tag = scala.reflect.ClassTag$.MODULE$.apply(String.class); + RDD<String> rddString = sc.parallelize(JavaConversions.asScalaBuffer(list), sc.defaultParallelism(), tag); + return rddString; + } + + /** * Convert a {@code MatrixObject} to a {@code List<String>} in CSV format. * * @param matrixObject @@ -799,6 +1122,39 @@ public class MLContextConversionUtil { } /** + * Convert a {@code FrameObject} to a {@code List<String>} in CSV format. + * + * @param frameObject + * the {@code FrameObject} + * @return the {@code FrameObject} converted to a {@code List<String>} + */ + public static List<String> frameObjectToListStringCSV(FrameObject frameObject, String delimiter) { + try { + FrameBlock fb = frameObject.acquireRead(); + + int rows = fb.getNumRows(); + int cols = fb.getNumColumns(); + List<String> list = new ArrayList<String>(); + + for (int i = 0; i < rows; i++) { + StringBuilder sb = new StringBuilder(); + for (int j = 0; j < cols; j++) { + if (j > 0) { + sb.append(delimiter); + } + sb.append(fb.get(i, j)); + } + list.add(sb.toString()); + } + + frameObject.release(); + return list; + } catch (CacheException e) { + throw new MLContextException("Cache exception while converting frame object to List<String> CSV format", e); + } + } + + /** * Convert a {@code MatrixObject} to a {@code List<String>} in IJV format. * * @param matrixObject @@ -853,6 +1209,42 @@ public class MLContextConversionUtil { } /** + * Convert a {@code FrameObject} to a {@code List<String>} in IJV format. + * + * @param frameObject + * the {@code FrameObject} + * @return the {@code FrameObject} converted to a {@code List<String>} + */ + public static List<String> frameObjectToListStringIJV(FrameObject frameObject) { + try { + FrameBlock fb = frameObject.acquireRead(); + + int rows = fb.getNumRows(); + int cols = fb.getNumColumns(); + List<String> list = new ArrayList<String>(); + + StringBuilder sb = null; + for (int i = 0; i < rows; i++) { + sb = new StringBuilder(); + for (int j = 0; j < cols; j++) { + sb = new StringBuilder(); + sb.append(i + 1); + sb.append(" "); + sb.append(j + 1); + sb.append(" "); + sb.append(fb.get(i, j)); + list.add(sb.toString()); + } + } + + frameObject.release(); + return list; + } catch (CacheException e) { + throw new MLContextException("Cache exception while converting frame object to List<String> IJV format", e); + } + } + + /** * Convert a {@code MatrixObject} to a two-dimensional double array. * * @param matrixObject @@ -905,6 +1297,31 @@ public class MLContextConversionUtil { } /** + * Convert a {@code FrameObject} to a {@code DataFrame}. + * + * @param frameObject + * the {@code FrameObject} + * @param sparkExecutionContext + * the Spark execution context + * @return the {@code FrameObject} converted to a {@code DataFrame} + */ + public static DataFrame frameObjectToDataFrame(FrameObject frameObject, + SparkExecutionContext sparkExecutionContext) { + try { + @SuppressWarnings("unchecked") + JavaPairRDD<Long, FrameBlock> binaryBlockFrame = (JavaPairRDD<Long, FrameBlock>) sparkExecutionContext + .getRDDHandleForFrameObject(frameObject, InputInfo.BinaryBlockInputInfo); + MatrixCharacteristics matrixCharacteristics = frameObject.getMatrixCharacteristics(); + + JavaSparkContext jsc = MLContextUtil.getJavaSparkContext((MLContext) MLContextProxy.getActiveMLContext()); + + return FrameRDDConverterUtils.binaryBlockToDataFrame(binaryBlockFrame, matrixCharacteristics, jsc); + } catch (DMLRuntimeException e) { + throw new MLContextException("DMLRuntimeException while converting frame object to DataFrame", e); + } + } + + /** * Convert a {@code MatrixObject} to a {@code BinaryBlockMatrix}. * * @param matrixObject @@ -926,5 +1343,5 @@ public class MLContextConversionUtil { throw new MLContextException("DMLRuntimeException while converting matrix object to BinaryBlockMatrix", e); } } - + } http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/2f9769e2/src/main/java/org/apache/sysml/api/mlcontext/MLContextUtil.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/api/mlcontext/MLContextUtil.java b/src/main/java/org/apache/sysml/api/mlcontext/MLContextUtil.java index 6da6468..9813174 100644 --- a/src/main/java/org/apache/sysml/api/mlcontext/MLContextUtil.java +++ b/src/main/java/org/apache/sysml/api/mlcontext/MLContextUtil.java @@ -38,6 +38,7 @@ import org.apache.commons.lang3.text.WordUtils; import org.apache.spark.SparkContext; import org.apache.spark.api.java.JavaPairRDD; import org.apache.spark.api.java.JavaRDD; +import org.apache.spark.api.java.JavaSparkContext; import org.apache.spark.rdd.RDD; import org.apache.spark.sql.DataFrame; import org.apache.sysml.conf.CompilerConfig; @@ -45,6 +46,7 @@ import org.apache.sysml.conf.CompilerConfig.ConfigType; import org.apache.sysml.conf.ConfigurationManager; import org.apache.sysml.conf.DMLConfig; import org.apache.sysml.parser.ParseException; +import org.apache.sysml.runtime.DMLRuntimeException; import org.apache.sysml.runtime.controlprogram.LocalVariableMap; import org.apache.sysml.runtime.controlprogram.caching.FrameObject; import org.apache.sysml.runtime.controlprogram.caching.MatrixObject; @@ -410,7 +412,7 @@ public final class MLContextUtil { * @return input in SystemML data representation */ public static Data convertInputType(String parameterName, Object parameterValue) { - return convertInputType(parameterName, parameterValue, null); + return convertInputType(parameterName, parameterValue, null, false); } /** @@ -422,9 +424,11 @@ public final class MLContextUtil { * The value of the input parameter * @param matrixMetadata * matrix metadata + * @param bFrame + * if input is of type frame * @return input in SystemML data representation */ - public static Data convertInputType(String parameterName, Object parameterValue, MatrixMetadata matrixMetadata) { + public static Data convertInputType(String parameterName, Object parameterValue, MatrixMetadata matrixMetadata, boolean bFrame) { String name = parameterName; Object value = parameterValue; if (name == null) { @@ -434,24 +438,44 @@ public final class MLContextUtil { } else if (value instanceof JavaRDD<?>) { @SuppressWarnings("unchecked") JavaRDD<String> javaRDD = (JavaRDD<String>) value; - MatrixObject matrixObject; - if ((matrixMetadata != null) && (matrixMetadata.getMatrixFormat() == MatrixFormat.IJV)) { - matrixObject = MLContextConversionUtil.javaRDDStringIJVToMatrixObject(name, javaRDD, matrixMetadata); + if(!bFrame) { + MatrixObject matrixObject; + if ((matrixMetadata != null) && (matrixMetadata.getMatrixFormat() == MatrixFormat.IJV)) { + matrixObject = MLContextConversionUtil.javaRDDStringIJVToMatrixObject(name, javaRDD, matrixMetadata); + } else { + matrixObject = MLContextConversionUtil.javaRDDStringCSVToMatrixObject(name, javaRDD, matrixMetadata); + } + return matrixObject; } else { - matrixObject = MLContextConversionUtil.javaRDDStringCSVToMatrixObject(name, javaRDD, matrixMetadata); + FrameObject frameObject; + if ((matrixMetadata != null) && (matrixMetadata.getMatrixFormat() == MatrixFormat.IJV)) { + frameObject = MLContextConversionUtil.javaRDDStringIJVToFrameObject(name, javaRDD, matrixMetadata); + } else { + frameObject = MLContextConversionUtil.javaRDDStringCSVToFrameObject(name, javaRDD, matrixMetadata); + } + return frameObject; } - return matrixObject; } else if (value instanceof RDD<?>) { @SuppressWarnings("unchecked") RDD<String> rdd = (RDD<String>) value; - MatrixObject matrixObject; - if ((matrixMetadata != null) && (matrixMetadata.getMatrixFormat() == MatrixFormat.IJV)) { - matrixObject = MLContextConversionUtil.rddStringIJVToMatrixObject(name, rdd, matrixMetadata); + if(!bFrame) { + MatrixObject matrixObject; + if ((matrixMetadata != null) && (matrixMetadata.getMatrixFormat() == MatrixFormat.IJV)) { + matrixObject = MLContextConversionUtil.rddStringIJVToMatrixObject(name, rdd, matrixMetadata); + } else { + matrixObject = MLContextConversionUtil.rddStringCSVToMatrixObject(name, rdd, matrixMetadata); + } + return matrixObject; } else { - matrixObject = MLContextConversionUtil.rddStringCSVToMatrixObject(name, rdd, matrixMetadata); + FrameObject frameObject; + if ((matrixMetadata != null) && (matrixMetadata.getMatrixFormat() == MatrixFormat.IJV)) { + frameObject = MLContextConversionUtil.rddStringIJVToFrameObject(name, rdd, matrixMetadata); + } else { + frameObject = MLContextConversionUtil.rddStringCSVToFrameObject(name, rdd, matrixMetadata); + } + return frameObject; } - return matrixObject; } else if (value instanceof MatrixBlock) { MatrixBlock matrixBlock = (MatrixBlock) value; MatrixObject matrixObject = MLContextConversionUtil.matrixBlockToMatrixObject(name, matrixBlock, @@ -459,14 +483,25 @@ public final class MLContextUtil { return matrixObject; } else if (value instanceof FrameBlock) { FrameBlock frameBlock = (FrameBlock) value; - FrameObject frameObject = MLContextConversionUtil.frameBlockToframeObject(name, frameBlock, + FrameObject frameObject = MLContextConversionUtil.frameBlockToFrameObject(name, frameBlock, matrixMetadata); return frameObject; } else if (value instanceof DataFrame) { DataFrame dataFrame = (DataFrame) value; - MatrixObject matrixObject = MLContextConversionUtil - .dataFrameToMatrixObject(name, dataFrame, matrixMetadata); - return matrixObject; + if(!bFrame) { + MatrixObject matrixObject = MLContextConversionUtil + .dataFrameToMatrixObject(name, dataFrame, matrixMetadata); + return matrixObject; + } else { + FrameObject frameObject = null; + try { + frameObject = MLContextConversionUtil + .dataFrameToFrameObject(name, dataFrame, matrixMetadata); + } catch (DMLRuntimeException e) { + e.printStackTrace(); + } + return frameObject; + } } else if (value instanceof BinaryBlockMatrix) { BinaryBlockMatrix binaryBlockMatrix = (BinaryBlockMatrix) value; if (matrixMetadata == null) { @@ -855,4 +890,14 @@ public final class MLContextUtil { return sb.toString(); } + public static SparkContext getSparkContext(MLContext mlContext) + { + return mlContext.getSparkContext(); + } + + public static JavaSparkContext getJavaSparkContext(MLContext mlContext) + { + return new JavaSparkContext(mlContext.getSparkContext()); + } + } http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/2f9769e2/src/main/java/org/apache/sysml/api/mlcontext/MLResults.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/api/mlcontext/MLResults.java b/src/main/java/org/apache/sysml/api/mlcontext/MLResults.java index 605ba95..3841bc8 100644 --- a/src/main/java/org/apache/sysml/api/mlcontext/MLResults.java +++ b/src/main/java/org/apache/sysml/api/mlcontext/MLResults.java @@ -142,6 +142,21 @@ public class MLResults { } /** + * Obtain an output as a {@code FrameObject} + * + * @param outputName + * the name of the output + * @return the output as a {@code FrameObject} + */ + public FrameObject getFrameObject(String outputName) { + Data data = getData(outputName); + if (!(data instanceof FrameObject)) { + throw new MLContextException("Variable '" + outputName + "' not a frame"); + } + return (FrameObject) data; + } + + /** * Obtain an output as a two-dimensional {@code double} array. * * @param outputName @@ -181,6 +196,19 @@ public class MLResults { } /** + * Obtain an output as a {@code JavaRDD<String>} in IJV format. + * + * @param outputName + * the name of the output + * @return the output as a {@code JavaRDD<String>} in IJV format + */ + public JavaRDD<String> getFrameJavaRDDStringIJV(String outputName) { + FrameObject fo = getFrameObject(outputName); + JavaRDD<String> javaRDDStringIJV = MLContextConversionUtil.frameObjectToJavaRDDStringIJV(fo); + return javaRDDStringIJV; + } + + /** * Obtain an output as a {@code JavaRDD<String>} in CSV format. * <p> * The following matrix in DML: @@ -205,6 +233,19 @@ public class MLResults { } /** + * Obtain an output as a {@code JavaRDD<String>} in CSV format. + * + * @param outputName + * the name of the output + * @return the output as a {@code JavaRDD<String>} in CSV format + */ + public JavaRDD<String> getFrameJavaRDDStringCSV(String outputName, String delimiter) { + FrameObject fo = getFrameObject(outputName); + JavaRDD<String> javaRDDStringCSV = MLContextConversionUtil.frameObjectToJavaRDDStringCSV(fo, delimiter); + return javaRDDStringCSV; + } + + /** * Obtain an output as a {@code RDD<String>} in CSV format. * <p> * The following matrix in DML: @@ -228,6 +269,20 @@ public class MLResults { return rddStringCSV; } + + /** + * Obtain an output as a {@code RDD<String>} in CSV format. + * + * @param outputName + * the name of the output + * @return the output as a {@code RDD<String>} in CSV format + */ + public RDD<String> getFrameRDDStringCSV(String outputName, String delimiter) { + FrameObject fo = getFrameObject(outputName); + RDD<String> rddStringCSV = MLContextConversionUtil.frameObjectToRDDStringCSV(fo, delimiter); + return rddStringCSV; + } + /** * Obtain an output as a {@code RDD<String>} in IJV format. * <p> @@ -255,6 +310,19 @@ public class MLResults { } /** + * Obtain an output as a {@code RDD<String>} in IJV format. + * + * @param outputName + * the name of the output + * @return the output as a {@code RDD<String>} in IJV format + */ + public RDD<String> getFrameRDDStringIJV(String outputName) { + FrameObject fo = getFrameObject(outputName); + RDD<String> rddStringIJV = MLContextConversionUtil.frameObjectToRDDStringIJV(fo); + return rddStringIJV; + } + + /** * Obtain an output as a {@code DataFrame} of doubles with an ID column. * <p> * The following matrix in DML: @@ -412,6 +480,19 @@ public class MLResults { } /** + * Obtain an output as a {@code DataFrame} without an ID column. + * + * @param outputName + * the name of the output + * @return the output as a {@code DataFrame} without an ID column + */ + public DataFrame getFrameDataFrame(String outputName) { + FrameObject mo = getFrameObject(outputName); + DataFrame df = MLContextConversionUtil.frameObjectToDataFrame(mo, sparkExecutionContext); + return df; + } + + /** * Obtain an output as a {@code Matrix}. * * @param outputName http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/2f9769e2/src/main/java/org/apache/sysml/api/mlcontext/Script.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/api/mlcontext/Script.java b/src/main/java/org/apache/sysml/api/mlcontext/Script.java index 1c2d795..a3ce430 100644 --- a/src/main/java/org/apache/sysml/api/mlcontext/Script.java +++ b/src/main/java/org/apache/sysml/api/mlcontext/Script.java @@ -29,6 +29,7 @@ import java.util.Map.Entry; import java.util.Set; import org.apache.sysml.runtime.controlprogram.LocalVariableMap; +import org.apache.sysml.runtime.controlprogram.caching.FrameObject; import org.apache.sysml.runtime.controlprogram.caching.MatrixObject; import org.apache.sysml.runtime.instructions.cp.Data; @@ -67,6 +68,10 @@ public class Script { */ private Set<String> inputVariables = new LinkedHashSet<String>(); /** + * The input variable type (if its frame of matrix). + */ + private Map<String, Boolean> inputVariablesType = new LinkedHashMap<String, Boolean>(); + /** * The input matrix metadata if present. */ private Map<String, MatrixMetadata> inputMatrixMetadata = new LinkedHashMap<String, MatrixMetadata>(); @@ -181,6 +186,15 @@ public class Script { } /** + * Obtain the input variable type flag (if its frame or not) + * + * @return the input variable names + */ + public Map<String, Boolean> getInputVariablesType() { + return inputVariablesType; + } + + /** * Obtain the output variable names as an unmodifiable set of strings. * * @return the output variable names @@ -410,6 +424,23 @@ public class Script { * @return {@code this} Script object to allow chaining of methods */ public Script in(String name, Object value, MatrixMetadata matrixMetadata) { + return in(name, value, matrixMetadata, false); + } + /** + * Register an input (parameter ($) or variable) with optional matrix + * metadata. + * + * @param name + * name of the input + * @param value + * value of the input + * @param matrixMetadata + * optional matrix metadata + * @param bFrame + * if input is of type frame + * @return {@code this} Script object to allow chaining of methods + */ + public Script in(String name, Object value, MatrixMetadata matrixMetadata, boolean bFrame) { MLContextUtil.checkInputValueType(name, value); if (inputs == null) { inputs = new LinkedHashMap<String, Object>(); @@ -423,11 +454,15 @@ public class Script { } inputParameters.put(name, value); } else { - Data data = MLContextUtil.convertInputType(name, value, matrixMetadata); + Data data = MLContextUtil.convertInputType(name, value, matrixMetadata, bFrame); if (data != null) { symbolTable.put(name, data); inputVariables.add(name); - if (data instanceof MatrixObject) { + if (inputVariablesType == null) { + inputVariablesType = new LinkedHashMap<String, Boolean>(); + } + inputVariablesType.put(name, new Boolean(bFrame)); + if (data instanceof MatrixObject || data instanceof FrameObject) { if (matrixMetadata != null) { inputMatrixMetadata.put(name, matrixMetadata); } @@ -531,6 +566,7 @@ public class Script { inputs.clear(); inputParameters.clear(); inputVariables.clear(); + inputVariablesType.clear(); inputMatrixMetadata.clear(); } @@ -633,7 +669,10 @@ public class Script { } else if (MLContextUtil.isBasicType(inValue)) { sb.append(" = read('', data_type='scalar');\n"); } else { - sb.append(" = read('');\n"); + if(inputVariablesType.get(in).booleanValue()) + sb.append(" = read('', data_type='frame');\n"); + else + sb.append(" = read('');\n"); } } else if (isPYDML()) { if (inValue instanceof String) { @@ -642,7 +681,10 @@ public class Script { } else if (MLContextUtil.isBasicType(inValue)) { sb.append(" = load('', data_type='scalar')\n"); } else { - sb.append(" = load('')\n"); + if(inputVariablesType.get(in).booleanValue()) + sb.append(" = load('', data_type='frame')\n"); + else + sb.append(" = load('')\n"); } } http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/2f9769e2/src/main/java/org/apache/sysml/api/mlcontext/ScriptExecutor.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/api/mlcontext/ScriptExecutor.java b/src/main/java/org/apache/sysml/api/mlcontext/ScriptExecutor.java index cd4797c..cf0d09f 100644 --- a/src/main/java/org/apache/sysml/api/mlcontext/ScriptExecutor.java +++ b/src/main/java/org/apache/sysml/api/mlcontext/ScriptExecutor.java @@ -355,7 +355,7 @@ public class ScriptExecutor { if (symbolTable.get(inputVariable) == null) { // retrieve optional metadata if it exists MatrixMetadata mm = inputMatrixMetadata.get(inputVariable); - script.in(inputVariable, inputs.get(inputVariable), mm); + script.in(inputVariable, inputs.get(inputVariable), mm, script.getInputVariablesType().get(inputVariable)); } } } http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/2f9769e2/src/main/java/org/apache/sysml/runtime/instructions/spark/ReblockSPInstruction.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/runtime/instructions/spark/ReblockSPInstruction.java b/src/main/java/org/apache/sysml/runtime/instructions/spark/ReblockSPInstruction.java index d0128f9..b61bb4f 100644 --- a/src/main/java/org/apache/sysml/runtime/instructions/spark/ReblockSPInstruction.java +++ b/src/main/java/org/apache/sysml/runtime/instructions/spark/ReblockSPInstruction.java @@ -242,6 +242,27 @@ public class ReblockSPInstruction extends UnarySPInstruction sec.setRDDHandleForVariable(output.getName(), out); sec.addLineageRDD(output.getName(), input1.getName()); } + else if(iinfo == InputInfo.CSVInputInfo) { + // HACK ALERT: Until we introduces the rewrite to insert csvrblock for non-persistent read + // throw new DMLRuntimeException("CSVInputInfo is not supported for ReblockSPInstruction"); + CSVReblockSPInstruction csvInstruction = null; + boolean hasHeader = false; + String delim = ","; + boolean fill = false; + double fillValue = 0; + if(fo.getFileFormatProperties() instanceof CSVFileFormatProperties + && fo.getFileFormatProperties() != null ) + { + CSVFileFormatProperties props = (CSVFileFormatProperties) fo.getFileFormatProperties(); + hasHeader = props.hasHeader(); + delim = props.getDelim(); + fill = props.isFill(); + fillValue = props.getFillValue(); + } + + csvInstruction = new CSVReblockSPInstruction(null, input1, output, mcOut.getRowsPerBlock(), mcOut.getColsPerBlock(), hasHeader, delim, fill, fillValue, "csvrblk", instString); + csvInstruction.processInstruction(sec); + } else { throw new DMLRuntimeException("The given InputInfo is not implemented for ReblockSPInstruction:" + iinfo); } http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/2f9769e2/src/test/java/org/apache/sysml/test/integration/mlcontext/MLContextTest.java ---------------------------------------------------------------------- diff --git a/src/test/java/org/apache/sysml/test/integration/mlcontext/MLContextTest.java b/src/test/java/org/apache/sysml/test/integration/mlcontext/MLContextTest.java index e65dfe7..fd220d9 100644 --- a/src/test/java/org/apache/sysml/test/integration/mlcontext/MLContextTest.java +++ b/src/test/java/org/apache/sysml/test/integration/mlcontext/MLContextTest.java @@ -38,6 +38,7 @@ import java.io.InputStream; import java.net.MalformedURLException; import java.net.URL; import java.util.ArrayList; +import java.util.Arrays; import java.util.HashMap; import java.util.List; import java.util.Map; @@ -66,7 +67,9 @@ import org.apache.sysml.api.mlcontext.MatrixFormat; import org.apache.sysml.api.mlcontext.MatrixMetadata; import org.apache.sysml.api.mlcontext.Script; import org.apache.sysml.api.mlcontext.ScriptExecutor; +import org.apache.sysml.parser.Expression.ValueType; import org.apache.sysml.runtime.controlprogram.caching.MatrixObject; +import org.apache.sysml.runtime.instructions.spark.utils.FrameRDDConverterUtils; import org.apache.sysml.test.integration.AutomatedTestBase; import org.junit.After; import org.junit.AfterClass; @@ -84,6 +87,9 @@ public class MLContextTest extends AutomatedTestBase { protected final static String TEST_DIR = "org/apache/sysml/api/mlcontext"; protected final static String TEST_NAME = "MLContext"; + public static enum SCRIPT_TYPE {DML, PYDML, SCALA}; + public static enum IO_TYPE {ANY, FILE, JAVA_RDD_STR_CSV, JAVA_RDD_STR_IJV, RDD_STR_CSV, RDD_STR_IJV, DATAFRAME}; + private static SparkConf conf; private static JavaSparkContext sc; private static MLContext ml; @@ -2324,6 +2330,322 @@ public class MLContextTest extends AutomatedTestBase { // ml.execute(script); // } + //////////////////////////////////////////// + // SystemML Frame MLContext testset Begin + //////////////////////////////////////////// + @Test + public void testFrameJavaRDD_CSV_DML() { + testFrame(MatrixFormat.CSV, SCRIPT_TYPE.DML, IO_TYPE.JAVA_RDD_STR_CSV, IO_TYPE.ANY); + } + + @Test + public void testFrameJavaRDD_CSV_DML_OutJavaRddCSV() { + testFrame(MatrixFormat.CSV, SCRIPT_TYPE.DML, IO_TYPE.JAVA_RDD_STR_CSV, IO_TYPE.JAVA_RDD_STR_CSV); + } + + @Test + public void testFrameJavaRDD_CSV_PYDML() { + testFrame(MatrixFormat.CSV, SCRIPT_TYPE.PYDML, IO_TYPE.JAVA_RDD_STR_CSV, IO_TYPE.ANY); + } + + @Test + public void testFrameRDD_CSV_PYDML() { + testFrame(MatrixFormat.CSV, SCRIPT_TYPE.PYDML, IO_TYPE.RDD_STR_CSV, IO_TYPE.ANY); + } + + @Test + public void testFrameJavaRDD_CSV_PYDML_OutRddIJV() { + testFrame(MatrixFormat.CSV, SCRIPT_TYPE.PYDML, IO_TYPE.JAVA_RDD_STR_CSV, IO_TYPE.RDD_STR_IJV); + } + + @Test + public void testFrameJavaRDD_IJV_DML() { + testFrame(MatrixFormat.IJV, SCRIPT_TYPE.DML, IO_TYPE.JAVA_RDD_STR_IJV, IO_TYPE.ANY); + } + + @Test + public void testFrameRDD_IJV_DML() { + testFrame(MatrixFormat.IJV, SCRIPT_TYPE.DML, IO_TYPE.RDD_STR_IJV, IO_TYPE.ANY); + } + + @Test + public void testFrameJavaRDD_IJV_DML_OutRddCSV() { + testFrame(MatrixFormat.IJV, SCRIPT_TYPE.DML, IO_TYPE.JAVA_RDD_STR_IJV, IO_TYPE.RDD_STR_CSV); + } + + @Test + public void testFrameJavaRDD_IJV_PYDML() { + testFrame(MatrixFormat.IJV, SCRIPT_TYPE.PYDML, IO_TYPE.JAVA_RDD_STR_IJV, IO_TYPE.ANY); + } + + @Test + public void testFrameJavaRDD_IJV_PYDML_OutJavaRddIJV() { + testFrame(MatrixFormat.IJV, SCRIPT_TYPE.PYDML, IO_TYPE.JAVA_RDD_STR_IJV, IO_TYPE.JAVA_RDD_STR_IJV); + } + + @Test + public void testFrameFile_CSV_DML() { + testFrame(MatrixFormat.CSV, SCRIPT_TYPE.DML, IO_TYPE.FILE, IO_TYPE.ANY); + } + + @Test + public void testFrameFile_CSV_PYDML() { + testFrame(MatrixFormat.CSV, SCRIPT_TYPE.PYDML, IO_TYPE.FILE, IO_TYPE.ANY); + } + + @Test + public void testFrameFile_IJV_DML() { + testFrame(MatrixFormat.IJV, SCRIPT_TYPE.DML, IO_TYPE.FILE, IO_TYPE.ANY); + } + + @Test + public void testFrameFile_IJV_PYDML() { + testFrame(MatrixFormat.IJV, SCRIPT_TYPE.PYDML, IO_TYPE.FILE, IO_TYPE.ANY); + } + + @Test + public void testFrameDataFrame_CSV_DML() { + testFrame(MatrixFormat.CSV, SCRIPT_TYPE.DML, IO_TYPE.DATAFRAME, IO_TYPE.ANY); + } + + @Test + public void testFrameDataFrame_CSV_PYDML() { + testFrame(MatrixFormat.CSV, SCRIPT_TYPE.PYDML, IO_TYPE.DATAFRAME, IO_TYPE.ANY); + } + + @Test + public void testFrameDataFrameOutDataFrame_CSV_DML() { + testFrame(MatrixFormat.CSV, SCRIPT_TYPE.DML, IO_TYPE.DATAFRAME, IO_TYPE.DATAFRAME); + } + + + + + public void testFrame(MatrixFormat format, SCRIPT_TYPE script_type, IO_TYPE inputType, IO_TYPE outputType) { + + System.out.println("MLContextTest - Frame JavaRDD<String> for format: " + format + " Script: " + script_type); + + List<String> listA = new ArrayList<String>(); + List<String> listB = new ArrayList<String>(); + MatrixMetadata mmA = null, mmB = null; + Script script = null; + + + if(inputType != IO_TYPE.FILE) { + if(format == MatrixFormat.CSV) { + listA.add("1,Str2,3.0,true"); + listA.add("4,Str5,6.0,false"); + listA.add("7,Str8,9.0,true"); + + listB.add("Str12,13.0,true"); + listB.add("Str25,26.0,false"); + + mmA = new MatrixMetadata(MatrixFormat.CSV, 3, 4); + mmB = new MatrixMetadata(MatrixFormat.CSV, 2, 3); + } else if(format == MatrixFormat.IJV) { + listA.add("1 1 1"); + listA.add("1 2 Str2"); + listA.add("1 3 3.0"); + listA.add("1 4 true"); + listA.add("2 1 4"); + listA.add("2 2 Str5"); + listA.add("2 3 6.0"); + listA.add("2 4 false"); + listA.add("3 1 7"); + listA.add("3 2 Str8"); + listA.add("3 3 9.0"); + listA.add("3 4 true"); + + listB.add("1 1 Str12"); + listB.add("1 2 13.0"); + listB.add("1 3 true"); + listB.add("2 1 Str25"); + listB.add("2 2 26.0"); + listB.add("2 3 false"); + + mmA = new MatrixMetadata(MatrixFormat.IJV, 3, 4); + mmB = new MatrixMetadata(MatrixFormat.IJV, 2, 3); + } + JavaRDD<String> javaRDDA = sc.parallelize(listA); + JavaRDD<String> javaRDDB = sc.parallelize(listB); + + if(inputType == IO_TYPE.DATAFRAME) { + JavaRDD<Row> javaRddRowA = javaRDDA.map(new CommaSeparatedValueStringToRow()); + JavaRDD<Row> javaRddRowB = javaRDDB.map(new CommaSeparatedValueStringToRow()); + + ValueType[] schemaA = {ValueType.INT, ValueType.STRING, ValueType.DOUBLE, ValueType.BOOLEAN}; + List<ValueType> lschemaA = Arrays.asList(schemaA); + ValueType[] schemaB = {ValueType.STRING, ValueType.DOUBLE, ValueType.BOOLEAN}; + List<ValueType> lschemaB = Arrays.asList(schemaB); + + //Create DataFrame + SQLContext sqlContext = new SQLContext(sc); + StructType dfSchemaA = FrameRDDConverterUtils.convertFrameSchemaToDFSchema(lschemaA); + DataFrame dataFrameA = sqlContext.createDataFrame(javaRddRowA, dfSchemaA); + StructType dfSchemaB = FrameRDDConverterUtils.convertFrameSchemaToDFSchema(lschemaB); + DataFrame dataFrameB = sqlContext.createDataFrame(javaRddRowB, dfSchemaB); + if (script_type == SCRIPT_TYPE.DML) + script = dml("A[2:3,2:4]=B;C=A[2:3,2:3]").in("A", dataFrameA, mmA, true).in("B", dataFrameB, mmB, true).out("A").out("C"); + else if (script_type == SCRIPT_TYPE.PYDML) + // DO NOT USE ; at the end of any statment, it throws NPE + script = pydml("A[$X:$Y,$X:$Z]=B\nC=A[$X:$Y,$X:$Y]").in("A", dataFrameA, mmA, true).in("B", dataFrameB, mmB, true) + // Value for ROW index gets incremented at script level to adjust index in PyDML, but not for Column Index + .in("$X", 1).in("$Y", 3).in("$Z", 4).out("A").out("C"); + } else { + if(inputType == IO_TYPE.JAVA_RDD_STR_CSV || inputType == IO_TYPE.JAVA_RDD_STR_IJV) { + if (script_type == SCRIPT_TYPE.DML) + script = dml("A[2:3,2:4]=B;C=A[2:3,2:3]").in("A", javaRDDA, mmA, true).in("B", javaRDDB, mmB, true).out("A").out("C"); + else if (script_type == SCRIPT_TYPE.PYDML) + // DO NOT USE ; at the end of any statment, it throws NPE + script = pydml("A[$X:$Y,$X:$Z]=B\nC=A[$X:$Y,$X:$Y]").in("A", javaRDDA, mmA, true).in("B", javaRDDB, mmB, true) + // Value for ROW index gets incremented at script level to adjust index in PyDML, but not for Column Index + .in("$X", 1).in("$Y", 3).in("$Z", 4).out("A").out("C"); + } else if(inputType == IO_TYPE.RDD_STR_CSV || inputType == IO_TYPE.RDD_STR_IJV) { + RDD<String> rddA = JavaRDD.toRDD(javaRDDA); + RDD<String> rddB = JavaRDD.toRDD(javaRDDB); + + if (script_type == SCRIPT_TYPE.DML) + script = dml("A[2:3,2:4]=B;C=A[2:3,2:3]").in("A", rddA, mmA, true).in("B", rddB, mmB, true).out("A").out("C"); + else if (script_type == SCRIPT_TYPE.PYDML) + // DO NOT USE ; at the end of any statment, it throws NPE + script = pydml("A[$X:$Y,$X:$Z]=B\nC=A[$X:$Y,$X:$Y]").in("A", rddA, mmA, true).in("B", rddB, mmB, true) + // Value for ROW index gets incremented at script level to adjust index in PyDML, but not for Column Index + .in("$X", 1).in("$Y", 3).in("$Z", 4).out("A").out("C"); + } + + } + + } else { // Input type is file + String fileA = null, fileB = null; + if(format == MatrixFormat.CSV) { + fileA = baseDirectory + File.separator + "FrameA.csv"; + fileB = baseDirectory + File.separator + "FrameB.csv"; + } else if(format == MatrixFormat.IJV) { + fileA = baseDirectory + File.separator + "FrameA.ijv"; + fileB = baseDirectory + File.separator + "FrameB.ijv"; + } + + if (script_type == SCRIPT_TYPE.DML) + script = dml("A=read($A); B=read($B);A[2:3,2:4]=B;C=A[2:3,2:3]").in("$A", fileA, mmA, true).in("$B", fileB, mmB, true).out("A").out("C"); + else if (script_type == SCRIPT_TYPE.PYDML) + // DO NOT USE ; at the end of any statment, it throws NPE + script = pydml("A=load($A)\nB=load($B)\nA[$X:$Y,$X:$Z]=B\nC=A[$X:$Y,$X:$Y]").in("$A", fileA).in("$B", fileB) + // Value for ROW index gets incremented at script level to adjust index in PyDML, but not for Column Index + .in("$X", 1).in("$Y", 3).in("$Z", 4).out("A").out("C"); + } + + MLResults mlResults = ml.execute(script); + + if(outputType == IO_TYPE.JAVA_RDD_STR_CSV) { + + JavaRDD<String> javaRDDStringCSVA = mlResults.getFrameJavaRDDStringCSV("A", ","); + List<String> linesA = javaRDDStringCSVA.collect(); + Assert.assertEquals("1,Str2,3.0,true", linesA.get(0)); + Assert.assertEquals("4,Str12,13.0,true", linesA.get(1)); + Assert.assertEquals("7,Str25,26.0,false", linesA.get(2)); + + JavaRDD<String> javaRDDStringCSVC = mlResults.getFrameJavaRDDStringCSV("C", ","); + List<String> linesC = javaRDDStringCSVC.collect(); + Assert.assertEquals("Str12,13.0", linesC.get(0)); + Assert.assertEquals("Str25,26.0", linesC.get(1)); + } else if(outputType == IO_TYPE.JAVA_RDD_STR_IJV) { + JavaRDD<String> javaRDDStringIJVA = mlResults.getFrameJavaRDDStringIJV("A"); + List<String> linesA = javaRDDStringIJVA.collect(); + Assert.assertEquals("1 1 1", linesA.get(0)); + Assert.assertEquals("1 2 Str2", linesA.get(1)); + Assert.assertEquals("1 3 3.0", linesA.get(2)); + Assert.assertEquals("1 4 true", linesA.get(3)); + Assert.assertEquals("2 1 4", linesA.get(4)); + Assert.assertEquals("2 2 Str12", linesA.get(5)); + Assert.assertEquals("2 3 13.0", linesA.get(6)); + Assert.assertEquals("2 4 true", linesA.get(7)); + + JavaRDD<String> javaRDDStringIJVC = mlResults.getFrameJavaRDDStringIJV("C"); + List<String> linesC = javaRDDStringIJVC.collect(); + Assert.assertEquals("1 1 Str12", linesC.get(0)); + Assert.assertEquals("1 2 13.0", linesC.get(1)); + Assert.assertEquals("2 1 Str25", linesC.get(2)); + Assert.assertEquals("2 2 26.0", linesC.get(3)); + } else if(outputType == IO_TYPE.RDD_STR_CSV) { + RDD<String> rddStringCSVA = mlResults.getFrameRDDStringCSV("A", ","); //TODO fix delimiter + Iterator<String> iteratorA = rddStringCSVA.toLocalIterator(); + Assert.assertEquals("1,Str2,3.0,true", iteratorA.next()); + Assert.assertEquals("4,Str12,13.0,true", iteratorA.next()); + Assert.assertEquals("7,Str25,26.0,false", iteratorA.next()); + + RDD<String> rddStringCSVC = mlResults.getFrameRDDStringCSV("C", ","); //TODO fix delimiter + Iterator<String> iteratorC = rddStringCSVC.toLocalIterator(); + Assert.assertEquals("Str12,13.0", iteratorC.next()); + Assert.assertEquals("Str25,26.0", iteratorC.next()); + } else if(outputType == IO_TYPE.RDD_STR_IJV) { + RDD<String> rddStringIJVA = mlResults.getFrameRDDStringIJV("A"); + Iterator<String> iteratorA = rddStringIJVA.toLocalIterator(); + Assert.assertEquals("1 1 1", iteratorA.next()); + Assert.assertEquals("1 2 Str2", iteratorA.next()); + Assert.assertEquals("1 3 3.0", iteratorA.next()); + Assert.assertEquals("1 4 true", iteratorA.next()); + Assert.assertEquals("2 1 4", iteratorA.next()); + Assert.assertEquals("2 2 Str12", iteratorA.next()); + Assert.assertEquals("2 3 13.0", iteratorA.next()); + Assert.assertEquals("2 4 true", iteratorA.next()); + Assert.assertEquals("3 1 7", iteratorA.next()); + Assert.assertEquals("3 2 Str25", iteratorA.next()); + Assert.assertEquals("3 3 26.0", iteratorA.next()); + Assert.assertEquals("3 4 false", iteratorA.next()); + + RDD<String> rddStringIJVC = mlResults.getFrameRDDStringIJV("C"); + Iterator<String> iteratorC = rddStringIJVC.toLocalIterator(); + Assert.assertEquals("1 1 Str12", iteratorC.next()); + Assert.assertEquals("1 2 13.0", iteratorC.next()); + Assert.assertEquals("2 1 Str25", iteratorC.next()); + Assert.assertEquals("2 2 26.0", iteratorC.next()); + + } else if(outputType == IO_TYPE.DATAFRAME) { + + DataFrame dataFrameA = mlResults.getFrameDataFrame("A"); + List<Row> listAOut = dataFrameA.collectAsList(); + + Row row1 = listAOut.get(0); + Assert.assertEquals("Mistmatch with expected value", "1", row1.getString(0)); + Assert.assertEquals("Mistmatch with expected value", "Str2", row1.getString(1)); + Assert.assertEquals("Mistmatch with expected value", "3.0", row1.getString(2)); + Assert.assertEquals("Mistmatch with expected value", "true", row1.getString(3)); + + Row row2 = listAOut.get(1); + Assert.assertEquals("Mistmatch with expected value", "4", row2.getString(0)); + Assert.assertEquals("Mistmatch with expected value", "Str12", row2.getString(1)); + Assert.assertEquals("Mistmatch with expected value", "13.0", row2.getString(2)); + Assert.assertEquals("Mistmatch with expected value", "true", row2.getString(3)); + + DataFrame dataFrameC = mlResults.getFrameDataFrame("C"); + List<Row> listCOut = dataFrameC.collectAsList(); + + Row row3 = listCOut.get(0); + Assert.assertEquals("Mistmatch with expected value", "Str12", row3.getString(0)); + Assert.assertEquals("Mistmatch with expected value", "13.0", row3.getString(1)); + + Row row4 = listCOut.get(1); + Assert.assertEquals("Mistmatch with expected value", "Str25", row4.getString(0)); + Assert.assertEquals("Mistmatch with expected value", "26.0", row4.getString(1)); + } else { + String[][] frameA = mlResults.getFrame("A"); + Assert.assertEquals("Str2", frameA[0][1]); + Assert.assertEquals("3.0", frameA[0][2]); + Assert.assertEquals("13.0", frameA[1][2]); + Assert.assertEquals("true", frameA[1][3]); + Assert.assertEquals("Str25", frameA[2][1]); + + String[][] frameC = mlResults.getFrame("C"); + Assert.assertEquals("Str12", frameC[0][0]); + Assert.assertEquals("Str25", frameC[1][0]); + Assert.assertEquals("13.0", frameC[0][1]); + Assert.assertEquals("26.0", frameC[1][1]); + } + } + //////////////////////////////////////////// + // SystemML Frame MLContext testset End + //////////////////////////////////////////// + @After public void tearDown() { super.tearDown(); http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/2f9769e2/src/test/scripts/org/apache/sysml/api/mlcontext/FrameA.csv ---------------------------------------------------------------------- diff --git a/src/test/scripts/org/apache/sysml/api/mlcontext/FrameA.csv b/src/test/scripts/org/apache/sysml/api/mlcontext/FrameA.csv new file mode 100644 index 0000000..495538d --- /dev/null +++ b/src/test/scripts/org/apache/sysml/api/mlcontext/FrameA.csv @@ -0,0 +1,3 @@ +1,Str2,3.0,true +4,Str5,6.0,false +7,Str8,9.0,true \ No newline at end of file http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/2f9769e2/src/test/scripts/org/apache/sysml/api/mlcontext/FrameA.csv.mtd ---------------------------------------------------------------------- diff --git a/src/test/scripts/org/apache/sysml/api/mlcontext/FrameA.csv.mtd b/src/test/scripts/org/apache/sysml/api/mlcontext/FrameA.csv.mtd new file mode 100644 index 0000000..907e192 --- /dev/null +++ b/src/test/scripts/org/apache/sysml/api/mlcontext/FrameA.csv.mtd @@ -0,0 +1,13 @@ +{ + "data_type": "frame", + "value_type": "string", + "rows": 3, + "cols": 4, + "nnz": -1, + "format": "csv", + "header": false, + "sep": ",", + "description": { + "author": "SystemML" + } +} \ No newline at end of file http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/2f9769e2/src/test/scripts/org/apache/sysml/api/mlcontext/FrameA.ijv ---------------------------------------------------------------------- diff --git a/src/test/scripts/org/apache/sysml/api/mlcontext/FrameA.ijv b/src/test/scripts/org/apache/sysml/api/mlcontext/FrameA.ijv new file mode 100644 index 0000000..21f2567 --- /dev/null +++ b/src/test/scripts/org/apache/sysml/api/mlcontext/FrameA.ijv @@ -0,0 +1,12 @@ +1 1 1 +1 2 Str2 +1 3 3.0 +1 4 true +2 1 4 +2 2 Str5 +2 3 6.0 +2 4 false +3 1 7 +3 2 Str8 +3 3 9.0 +3 4 true http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/2f9769e2/src/test/scripts/org/apache/sysml/api/mlcontext/FrameA.ijv.mtd ---------------------------------------------------------------------- diff --git a/src/test/scripts/org/apache/sysml/api/mlcontext/FrameA.ijv.mtd b/src/test/scripts/org/apache/sysml/api/mlcontext/FrameA.ijv.mtd new file mode 100644 index 0000000..e977408 --- /dev/null +++ b/src/test/scripts/org/apache/sysml/api/mlcontext/FrameA.ijv.mtd @@ -0,0 +1,13 @@ +{ + "data_type": "frame", + "value_type": "string", + "rows": 3, + "cols": 4, + "nnz": -1, + "format": "text", + "header": false, + "sep": ",", + "description": { + "author": "SystemML" + } +} \ No newline at end of file http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/2f9769e2/src/test/scripts/org/apache/sysml/api/mlcontext/FrameB.csv ---------------------------------------------------------------------- diff --git a/src/test/scripts/org/apache/sysml/api/mlcontext/FrameB.csv b/src/test/scripts/org/apache/sysml/api/mlcontext/FrameB.csv new file mode 100644 index 0000000..1244f17 --- /dev/null +++ b/src/test/scripts/org/apache/sysml/api/mlcontext/FrameB.csv @@ -0,0 +1,2 @@ +Str12,13.0,true +Str25,26.0,false http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/2f9769e2/src/test/scripts/org/apache/sysml/api/mlcontext/FrameB.csv.mtd ---------------------------------------------------------------------- diff --git a/src/test/scripts/org/apache/sysml/api/mlcontext/FrameB.csv.mtd b/src/test/scripts/org/apache/sysml/api/mlcontext/FrameB.csv.mtd new file mode 100644 index 0000000..729d5db --- /dev/null +++ b/src/test/scripts/org/apache/sysml/api/mlcontext/FrameB.csv.mtd @@ -0,0 +1,13 @@ +{ + "data_type": "frame", + "value_type": "string", + "rows": 2, + "cols": 3, + "nnz": -1, + "format": "csv", + "header": false, + "sep": ",", + "description": { + "author": "SystemML" + } +} \ No newline at end of file http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/2f9769e2/src/test/scripts/org/apache/sysml/api/mlcontext/FrameB.ijv ---------------------------------------------------------------------- diff --git a/src/test/scripts/org/apache/sysml/api/mlcontext/FrameB.ijv b/src/test/scripts/org/apache/sysml/api/mlcontext/FrameB.ijv new file mode 100644 index 0000000..02e7415 --- /dev/null +++ b/src/test/scripts/org/apache/sysml/api/mlcontext/FrameB.ijv @@ -0,0 +1,6 @@ +1 1 Str12 +1 2 13.0 +1 3 true +2 1 Str25 +2 2 26.0 +2 3 false http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/2f9769e2/src/test/scripts/org/apache/sysml/api/mlcontext/FrameB.ijv.mtd ---------------------------------------------------------------------- diff --git a/src/test/scripts/org/apache/sysml/api/mlcontext/FrameB.ijv.mtd b/src/test/scripts/org/apache/sysml/api/mlcontext/FrameB.ijv.mtd new file mode 100644 index 0000000..d8dc5f2 --- /dev/null +++ b/src/test/scripts/org/apache/sysml/api/mlcontext/FrameB.ijv.mtd @@ -0,0 +1,13 @@ +{ + "data_type": "frame", + "value_type": "string", + "rows": 2, + "cols": 3, + "nnz": -1, + "format": "text", + "header": false, + "sep": ",", + "description": { + "author": "SystemML" + } +} \ No newline at end of file
