[SYSTEMML-896] Additional MLContext Frame support
Project: http://git-wip-us.apache.org/repos/asf/incubator-systemml/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-systemml/commit/d39865e9 Tree: http://git-wip-us.apache.org/repos/asf/incubator-systemml/tree/d39865e9 Diff: http://git-wip-us.apache.org/repos/asf/incubator-systemml/diff/d39865e9 Branch: refs/heads/master Commit: d39865e9e3468cee0fab95cb9d8efe1ba4fe992f Parents: b7657db Author: Deron Eriksson <[email protected]> Authored: Sat Sep 3 23:10:40 2016 -0700 Committer: Deron Eriksson <[email protected]> Committed: Sat Sep 3 23:10:40 2016 -0700 ---------------------------------------------------------------------- .../sysml/api/mlcontext/BinaryBlockFrame.java | 179 +++++ .../sysml/api/mlcontext/BinaryBlockMatrix.java | 4 +- .../org/apache/sysml/api/mlcontext/Frame.java | 138 ++++ .../apache/sysml/api/mlcontext/FrameFormat.java | 42 ++ .../sysml/api/mlcontext/FrameMetadata.java | 695 +++++++++++++++++++ .../apache/sysml/api/mlcontext/FrameSchema.java | 128 ++++ .../api/mlcontext/MLContextConversionUtil.java | 250 ++++--- .../sysml/api/mlcontext/MLContextUtil.java | 306 +++++--- .../apache/sysml/api/mlcontext/MLResults.java | 247 ++++--- .../sysml/api/mlcontext/MatrixMetadata.java | 2 +- .../apache/sysml/api/mlcontext/Metadata.java | 30 + .../org/apache/sysml/api/mlcontext/Script.java | 100 +-- .../sysml/api/mlcontext/ScriptExecutor.java | 10 +- .../mlcontext/MLContextFrameTest.java | 557 +++++++++++++++ .../integration/mlcontext/MLContextTest.java | 359 +--------- .../integration/mlcontext/ZPackageSuite.java | 3 +- 16 files changed, 2319 insertions(+), 731 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/d39865e9/src/main/java/org/apache/sysml/api/mlcontext/BinaryBlockFrame.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/api/mlcontext/BinaryBlockFrame.java b/src/main/java/org/apache/sysml/api/mlcontext/BinaryBlockFrame.java new file mode 100644 index 0000000..88b1b38 --- /dev/null +++ b/src/main/java/org/apache/sysml/api/mlcontext/BinaryBlockFrame.java @@ -0,0 +1,179 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.sysml.api.mlcontext; + +import org.apache.spark.api.java.JavaPairRDD; +import org.apache.spark.sql.DataFrame; +import org.apache.sysml.runtime.DMLRuntimeException; +import org.apache.sysml.runtime.controlprogram.context.SparkExecutionContext; +import org.apache.sysml.runtime.matrix.MatrixCharacteristics; +import org.apache.sysml.runtime.matrix.data.FrameBlock; + +/** + * BinaryBlockFrame stores data as a SystemML binary-block frame representation. + * + */ +public class BinaryBlockFrame { + + JavaPairRDD<Long, FrameBlock> binaryBlocks; + FrameMetadata frameMetadata; + + /** + * Convert a Spark DataFrame to a SystemML binary-block representation. + * + * @param dataFrame + * the Spark DataFrame + * @param frameMetadata + * frame metadata, such as number of rows and columns + */ + public BinaryBlockFrame(DataFrame dataFrame, FrameMetadata frameMetadata) { + this.frameMetadata = frameMetadata; + binaryBlocks = MLContextConversionUtil.dataFrameToFrameBinaryBlocks(dataFrame, frameMetadata); + } + + /** + * Convert a Spark DataFrame to a SystemML binary-block representation, + * specifying the number of rows and columns. + * + * @param dataFrame + * the Spark DataFrame + * @param numRows + * the number of rows + * @param numCols + * the number of columns + */ + public BinaryBlockFrame(DataFrame dataFrame, long numRows, long numCols) { + this(dataFrame, new FrameMetadata(numRows, numCols, MLContextUtil.defaultBlockSize(), + MLContextUtil.defaultBlockSize())); + } + + /** + * Convert a Spark DataFrame to a SystemML binary-block representation. + * + * @param dataFrame + * the Spark DataFrame + */ + public BinaryBlockFrame(DataFrame dataFrame) { + this(dataFrame, new FrameMetadata()); + } + + /** + * Create a BinaryBlockFrame, specifying the SystemML binary-block frame and + * its metadata. + * + * @param binaryBlocks + * the {@code JavaPairRDD<Long, FrameBlock>} frame + * @param matrixCharacteristics + * the frame metadata as {@code MatrixCharacteristics} + */ + public BinaryBlockFrame(JavaPairRDD<Long, FrameBlock> binaryBlocks, MatrixCharacteristics matrixCharacteristics) { + this.binaryBlocks = binaryBlocks; + this.frameMetadata = new FrameMetadata(matrixCharacteristics); + } + + /** + * Create a BinaryBlockFrame, specifying the SystemML binary-block frame and + * its metadata. + * + * @param binaryBlocks + * the {@code JavaPairRDD<Long, FrameBlock>} frame + * @param frameMetadata + * the frame metadata as {@code FrameMetadata} + */ + public BinaryBlockFrame(JavaPairRDD<Long, FrameBlock> binaryBlocks, FrameMetadata frameMetadata) { + this.binaryBlocks = binaryBlocks; + this.frameMetadata = frameMetadata; + } + + /** + * Obtain a SystemML binary-block frame as a + * {@code JavaPairRDD<Long, FrameBlock>} + * + * @return the SystemML binary-block frame + */ + public JavaPairRDD<Long, FrameBlock> getBinaryBlocks() { + return binaryBlocks; + } + + /** + * Obtain a SystemML binary-block frame as a {@code FrameBlock} + * + * @return the SystemML binary-block frame as a {@code FrameBlock} + */ + public FrameBlock getFrameBlock() { + try { + MatrixCharacteristics mc = getMatrixCharacteristics(); + FrameSchema frameSchema = frameMetadata.getFrameSchema(); + FrameBlock mb = SparkExecutionContext.toFrameBlock(binaryBlocks, frameSchema.getSchema(), + (int) mc.getRows(), (int) mc.getCols()); + return mb; + } catch (DMLRuntimeException e) { + throw new MLContextException("Exception while getting FrameBlock from binary-block frame", e); + } + } + + /** + * Obtain the SystemML binary-block frame characteristics + * + * @return the frame metadata as {@code MatrixCharacteristics} + */ + public MatrixCharacteristics getMatrixCharacteristics() { + return frameMetadata.asMatrixCharacteristics(); + } + + /** + * Obtain the SystemML binary-block frame metadata + * + * @return the frame metadata as {@code FrameMetadata} + */ + public FrameMetadata getFrameMetadata() { + return frameMetadata; + } + + /** + * Set the SystemML binary-block frame metadata + * + * @param frameMetadata + * the frame metadata + */ + public void setFrameMetadata(FrameMetadata frameMetadata) { + this.frameMetadata = frameMetadata; + } + + /** + * Set the SystemML binary-block frame as a + * {@code JavaPairRDD<Long, FrameBlock>} + * + * @param binaryBlocks + * the SystemML binary-block frame + */ + public void setBinaryBlocks(JavaPairRDD<Long, FrameBlock> binaryBlocks) { + this.binaryBlocks = binaryBlocks; + } + + @Override + public String toString() { + if (frameMetadata != null) { + return frameMetadata.toString(); + } else { + return super.toString(); + } + } +} http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/d39865e9/src/main/java/org/apache/sysml/api/mlcontext/BinaryBlockMatrix.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/api/mlcontext/BinaryBlockMatrix.java b/src/main/java/org/apache/sysml/api/mlcontext/BinaryBlockMatrix.java index b13669d..ffa8a11 100644 --- a/src/main/java/org/apache/sysml/api/mlcontext/BinaryBlockMatrix.java +++ b/src/main/java/org/apache/sysml/api/mlcontext/BinaryBlockMatrix.java @@ -28,7 +28,7 @@ import org.apache.sysml.runtime.matrix.data.MatrixBlock; import org.apache.sysml.runtime.matrix.data.MatrixIndexes; /** - * BinaryBlockMatrix stores data as a SystemML binary-block representation. + * BinaryBlockMatrix stores data as a SystemML binary-block matrix representation. * */ public class BinaryBlockMatrix { @@ -46,7 +46,7 @@ public class BinaryBlockMatrix { */ public BinaryBlockMatrix(DataFrame dataFrame, MatrixMetadata matrixMetadata) { this.matrixMetadata = matrixMetadata; - binaryBlocks = MLContextConversionUtil.dataFrameToBinaryBlocks(dataFrame, matrixMetadata); + binaryBlocks = MLContextConversionUtil.dataFrameToMatrixBinaryBlocks(dataFrame, matrixMetadata); } /** http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/d39865e9/src/main/java/org/apache/sysml/api/mlcontext/Frame.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/api/mlcontext/Frame.java b/src/main/java/org/apache/sysml/api/mlcontext/Frame.java new file mode 100644 index 0000000..ee447df --- /dev/null +++ b/src/main/java/org/apache/sysml/api/mlcontext/Frame.java @@ -0,0 +1,138 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.sysml.api.mlcontext; + +import org.apache.spark.api.java.JavaRDD; +import org.apache.spark.rdd.RDD; +import org.apache.spark.sql.DataFrame; +import org.apache.sysml.runtime.controlprogram.caching.FrameObject; +import org.apache.sysml.runtime.controlprogram.context.SparkExecutionContext; +import org.apache.sysml.runtime.matrix.MatrixCharacteristics; + +/** + * Frame encapsulates a SystemML frame. + * + */ +public class Frame { + + private FrameObject frameObject; + private SparkExecutionContext sparkExecutionContext; + + public Frame(FrameObject frameObject, SparkExecutionContext sparkExecutionContext) { + this.frameObject = frameObject; + this.sparkExecutionContext = sparkExecutionContext; + } + + /** + * Obtain the frame as a SystemML FrameObject. + * + * @return the frame as a SystemML FrameObject + */ + public FrameObject asFrameObject() { + return frameObject; + } + + /** + * Obtain the frame as a two-dimensional String array + * + * @return the frame as a two-dimensional String array + */ + public String[][] as2DStringArray() { + String[][] strArray = MLContextConversionUtil.frameObjectTo2DStringArray(frameObject); + return strArray; + } + + /** + * Obtain the frame as a {@code JavaRDD<String>} in IJV format + * + * @return the frame as a {@code JavaRDD<String>} in IJV format + */ + public JavaRDD<String> asJavaRDDStringIJV() { + JavaRDD<String> javaRDDStringIJV = MLContextConversionUtil.frameObjectToJavaRDDStringIJV(frameObject); + return javaRDDStringIJV; + } + + /** + * Obtain the frame as a {@code JavaRDD<String>} in CSV format + * + * @return the frame as a {@code JavaRDD<String>} in CSV format + */ + public JavaRDD<String> asJavaRDDStringCSV() { + JavaRDD<String> javaRDDStringCSV = MLContextConversionUtil.frameObjectToJavaRDDStringCSV(frameObject, ","); + return javaRDDStringCSV; + } + + /** + * Obtain the frame as a {@code RDD<String>} in CSV format + * + * @return the frame as a {@code RDD<String>} in CSV format + */ + public RDD<String> asRDDStringCSV() { + RDD<String> rddStringCSV = MLContextConversionUtil.frameObjectToRDDStringCSV(frameObject, ","); + return rddStringCSV; + } + + /** + * Obtain the frame as a {@code RDD<String>} in IJV format + * + * @return the frame as a {@code RDD<String>} in IJV format + */ + public RDD<String> asRDDStringIJV() { + RDD<String> rddStringIJV = MLContextConversionUtil.frameObjectToRDDStringIJV(frameObject); + return rddStringIJV; + } + + /** + * Obtain the frame as a {@code DataFrame} + * + * @return the frame as a {@code DataFrame} + */ + public DataFrame asDataFrame() { + DataFrame df = MLContextConversionUtil.frameObjectToDataFrame(frameObject, sparkExecutionContext); + return df; + } + + /** + * Obtain the matrix as a {@code BinaryBlockFrame} + * + * @return the matrix as a {@code BinaryBlockFrame} + */ + public BinaryBlockFrame asBinaryBlockFrame() { + BinaryBlockFrame binaryBlockFrame = MLContextConversionUtil.frameObjectToBinaryBlockFrame(frameObject, + sparkExecutionContext); + return binaryBlockFrame; + } + + /** + * Obtain the frame metadata + * + * @return the frame metadata + */ + public FrameMetadata getFrameMetadata() { + MatrixCharacteristics matrixCharacteristics = frameObject.getMatrixCharacteristics(); + FrameMetadata frameMetadata = new FrameMetadata(matrixCharacteristics); + return frameMetadata; + } + + @Override + public String toString() { + return frameObject.toString(); + } +} http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/d39865e9/src/main/java/org/apache/sysml/api/mlcontext/FrameFormat.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/api/mlcontext/FrameFormat.java b/src/main/java/org/apache/sysml/api/mlcontext/FrameFormat.java new file mode 100644 index 0000000..bce8e5d --- /dev/null +++ b/src/main/java/org/apache/sysml/api/mlcontext/FrameFormat.java @@ -0,0 +1,42 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.sysml.api.mlcontext; + +/** + * FrameFormat represents the different frame formats supported by the MLContext + * API. + * + */ +public enum FrameFormat { + /** + * Comma-separated value format (dense). + */ + CSV, + + /** + * (I J V) format (sparse). I and J represent frame coordinates and V + * represents the value. The I J and V values are space-separated. + */ + IJV; + + public boolean hasIDColumn() { + return false; + } +} http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/d39865e9/src/main/java/org/apache/sysml/api/mlcontext/FrameMetadata.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/api/mlcontext/FrameMetadata.java b/src/main/java/org/apache/sysml/api/mlcontext/FrameMetadata.java new file mode 100644 index 0000000..5aabd80 --- /dev/null +++ b/src/main/java/org/apache/sysml/api/mlcontext/FrameMetadata.java @@ -0,0 +1,695 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.sysml.api.mlcontext; + +import org.apache.sysml.runtime.matrix.MatrixCharacteristics; + +/** + * Frame metadata, such as the number of rows, the number of columns, the number + * of non-zero values, the number of rows per block, and the number of columns + * per block in the frame. + * + */ +public class FrameMetadata extends Metadata { + + private Long numRows = null; + private Long numColumns = null; + private Long numNonZeros = null; + private Integer numRowsPerBlock = null; + private Integer numColumnsPerBlock = null; + private FrameFormat frameFormat; + private FrameSchema frameSchema; + + public FrameMetadata() { + } + + /** + * Constructor to create a FrameMetadata object based on a string + * representation of a frame schema. + * + * @param schema + * String representation of the frame schema. + */ + public FrameMetadata(String schema) { + this.frameSchema = new FrameSchema(schema); + } + + /** + * Constructor to create a FrameMetadata object based on frame format. + * + * @param frameFormat + * The frame format. + */ + public FrameMetadata(FrameFormat frameFormat) { + this.frameFormat = frameFormat; + } + + /** + * Constructor to create a FrameMetadata object based on frame schema. + * + * @param frameSchema + * The frame schema. + */ + public FrameMetadata(FrameSchema frameSchema) { + this.frameSchema = frameSchema; + } + + /** + * Constructor to create a FrameMetadata object based on frame format and + * frame schema. + * + * @param frameFormat + * The frame format. + * @param frameSchema + * The frame schema. + */ + public FrameMetadata(FrameFormat frameFormat, FrameSchema frameSchema) { + this.frameFormat = frameFormat; + this.frameSchema = frameSchema; + } + + /** + * Constructor to create a FrameMetadata object based on frame format, frame + * schema, the number of rows, and the number of columns in a frame. + * + * @param frameFormat + * The frame format. + * @param frameSchema + * The frame schema. + * @param numRows + * The number of rows in the frame. + * @param numColumns + * The number of columns in the frame. + */ + public FrameMetadata(FrameFormat frameFormat, FrameSchema frameSchema, Long numRows, Long numColumns) { + this.frameFormat = frameFormat; + this.frameSchema = frameSchema; + this.numRows = numRows; + this.numColumns = numColumns; + } + + /** + * Constructor to create a FrameMetadata object based on frame format, frame + * schema, the number of rows, and the number of columns in a frame. + * + * @param frameFormat + * The frame format. + * @param frameSchema + * The frame schema. + * @param numRows + * The number of rows in the frame. + * @param numColumns + * The number of columns in the frame. + */ + public FrameMetadata(FrameFormat frameFormat, FrameSchema frameSchema, int numRows, int numColumns) { + this.frameFormat = frameFormat; + this.frameSchema = frameSchema; + this.numRows = (long) numRows; + this.numColumns = (long) numColumns; + } + + /** + * Constructor to create a FrameMetadata object based on frame format, frame + * schema, the number of rows, the number of columns, the number of non-zero + * values, the number of rows per block, and the number of columns per block + * in a frame. + * + * @param frameFormat + * The frame format. + * @param frameSchema + * The frame schema. + * @param numRows + * The number of rows in the frame. + * @param numColumns + * The number of columns in the frame. + * @param numNonZeros + * The number of non-zero values in the frame. + * @param numRowsPerBlock + * The number of rows per block in the frame. + * @param numColumnsPerBlock + * The number of columns per block in the frame. + */ + public FrameMetadata(FrameFormat frameFormat, FrameSchema frameSchema, Long numRows, Long numColumns, + Long numNonZeros, Integer numRowsPerBlock, Integer numColumnsPerBlock) { + this.frameFormat = frameFormat; + this.frameSchema = frameSchema; + this.numRows = numRows; + this.numColumns = numColumns; + this.numNonZeros = numNonZeros; + this.numRowsPerBlock = numRowsPerBlock; + this.numColumnsPerBlock = numColumnsPerBlock; + } + + /** + * Constructor to create a FrameMetadata object based on frame format, frame + * schema, the number of rows, the number of columns, the number of non-zero + * values, the number of rows per block, and the number of columns per block + * in a frame. + * + * @param frameFormat + * The frame format. + * @param frameSchema + * The frame schema. + * @param numRows + * The number of rows in the frame. + * @param numColumns + * The number of columns in the frame. + * @param numNonZeros + * The number of non-zero values in the frame. + * @param numRowsPerBlock + * The number of rows per block in the frame. + * @param numColumnsPerBlock + * The number of columns per block in the frame. + */ + public FrameMetadata(FrameFormat frameFormat, FrameSchema frameSchema, int numRows, int numColumns, int numNonZeros, + int numRowsPerBlock, int numColumnsPerBlock) { + this.frameFormat = frameFormat; + this.frameSchema = frameSchema; + this.numRows = (long) numRows; + this.numColumns = (long) numColumns; + this.numNonZeros = (long) numNonZeros; + this.numRowsPerBlock = numRowsPerBlock; + this.numColumnsPerBlock = numColumnsPerBlock; + } + + /** + * Constructor to create a FrameMetadata object based on frame format, the + * number of rows, and the number of columns in a frame. + * + * @param frameFormat + * The frame format. + * @param numRows + * The number of rows in the frame. + * @param numColumns + * The number of columns in the frame. + */ + public FrameMetadata(FrameFormat frameFormat, Long numRows, Long numColumns) { + this.frameFormat = frameFormat; + this.numRows = numRows; + this.numColumns = numColumns; + } + + /** + * Constructor to create a FrameMetadata object based on frame format, the + * number of rows, and the number of columns in a frame. + * + * @param frameFormat + * The frame format. + * @param numRows + * The number of rows in the frame. + * @param numColumns + * The number of columns in the frame. + */ + public FrameMetadata(FrameFormat frameFormat, int numRows, int numColumns) { + this.frameFormat = frameFormat; + this.numRows = (long) numRows; + this.numColumns = (long) numColumns; + } + + /** + * Constructor to create a FrameMetadata object based on frame format, the + * number of rows, the number of columns, and the number of non-zero values + * in a frame. + * + * @param frameFormat + * The frame format. + * @param numRows + * The number of rows in the frame. + * @param numColumns + * The number of columns in the frame. + * @param numNonZeros + * The number of non-zero values in the frame. + */ + public FrameMetadata(FrameFormat frameFormat, Long numRows, Long numColumns, Long numNonZeros) { + this.frameFormat = frameFormat; + this.numRows = numRows; + this.numColumns = numColumns; + this.numNonZeros = numNonZeros; + } + + /** + * Constructor to create a FrameMetadata object based on frame format, the + * number of rows, the number of columns, and the number of non-zero values + * in a frame. + * + * @param frameFormat + * The frame format. + * @param numRows + * The number of rows in the frame. + * @param numColumns + * The number of columns in the frame. + * @param numNonZeros + * The number of non-zero values in the frame. + */ + public FrameMetadata(FrameFormat frameFormat, int numRows, int numColumns, int numNonZeros) { + this.frameFormat = frameFormat; + this.numRows = (long) numRows; + this.numColumns = (long) numColumns; + this.numNonZeros = (long) numNonZeros; + } + + /** + * Constructor to create a FrameMetadata object based on frame format, the + * number of rows, the number of columns, the number of non-zero values, the + * number of rows per block, and the number of columns per block in a frame. + * + * @param frameFormat + * The frame format. + * @param numRows + * The number of rows in the frame. + * @param numColumns + * The number of columns in the frame. + * @param numNonZeros + * The number of non-zero values in the frame. + * @param numRowsPerBlock + * The number of rows per block in the frame. + * @param numColumnsPerBlock + * The number of columns per block in the frame. + */ + public FrameMetadata(FrameFormat frameFormat, Long numRows, Long numColumns, Long numNonZeros, + Integer numRowsPerBlock, Integer numColumnsPerBlock) { + this.frameFormat = frameFormat; + this.numRows = numRows; + this.numColumns = numColumns; + this.numNonZeros = numNonZeros; + this.numRowsPerBlock = numRowsPerBlock; + this.numColumnsPerBlock = numColumnsPerBlock; + } + + /** + * Constructor to create a FrameMetadata object based on frame format, the + * number of rows, the number of columns, the number of non-zero values, the + * number of rows per block, and the number of columns per block in a frame. + * + * @param frameFormat + * The frame format. + * @param numRows + * The number of rows in the frame. + * @param numColumns + * The number of columns in the frame. + * @param numNonZeros + * The number of non-zero values in the frame. + * @param numRowsPerBlock + * The number of rows per block in the frame. + * @param numColumnsPerBlock + * The number of columns per block in the frame. + */ + public FrameMetadata(FrameFormat frameFormat, int numRows, int numColumns, int numNonZeros, int numRowsPerBlock, + int numColumnsPerBlock) { + this.frameFormat = frameFormat; + this.numRows = (long) numRows; + this.numColumns = (long) numColumns; + this.numNonZeros = (long) numNonZeros; + this.numRowsPerBlock = numRowsPerBlock; + this.numColumnsPerBlock = numColumnsPerBlock; + } + + /** + * Constructor to create a FrameMetadata object based on the number of rows + * and the number of columns in a frame. + * + * @param numRows + * The number of rows in the frame. + * @param numColumns + * The number of columns in the frame. + */ + public FrameMetadata(Long numRows, Long numColumns) { + this.numRows = numRows; + this.numColumns = numColumns; + } + + /** + * Constructor to create a FrameMetadata object based on the number of rows + * and the number of columns in a frame. + * + * @param numRows + * The number of rows in the frame. + * @param numColumns + * The number of columns in the frame. + */ + public FrameMetadata(int numRows, int numColumns) { + this.numRows = (long) numRows; + this.numColumns = (long) numColumns; + } + + /** + * Constructor to create a FrameMetadata object based on the number of rows, + * the number of columns, and the number of non-zero values in a frame. + * + * @param numRows + * The number of rows in the frame. + * @param numColumns + * The number of columns in the frame. + * @param numNonZeros + * The number of non-zero values in the frame. + */ + public FrameMetadata(Long numRows, Long numColumns, Long numNonZeros) { + this.numRows = numRows; + this.numColumns = numColumns; + this.numNonZeros = numNonZeros; + } + + /** + * Constructor to create a FrameMetadata object based on the number of rows, + * the number of columns, and the number of non-zero values in a frame. + * + * @param numRows + * The number of rows in the frame. + * @param numColumns + * The number of columns in the frame. + * @param numNonZeros + * The number of non-zero values in the frame. + */ + public FrameMetadata(int numRows, int numColumns, int numNonZeros) { + this.numRows = (long) numRows; + this.numColumns = (long) numColumns; + this.numNonZeros = (long) numNonZeros; + } + + /** + * Constructor to create a FrameMetadata object based on the number of rows, + * the number of columns, the number of rows per block, and the number of + * columns per block in a frame. + * + * @param numRows + * The number of rows in the frame. + * @param numColumns + * The number of columns in the frame. + * @param numRowsPerBlock + * The number of rows per block in the frame. + * @param numColumnsPerBlock + * The number of columns per block in the frame. + */ + public FrameMetadata(Long numRows, Long numColumns, Integer numRowsPerBlock, Integer numColumnsPerBlock) { + this.numRows = numRows; + this.numColumns = numColumns; + this.numRowsPerBlock = numRowsPerBlock; + this.numColumnsPerBlock = numColumnsPerBlock; + } + + /** + * Constructor to create a FrameMetadata object based on the number of rows, + * the number of columns, the number of rows per block, and the number of + * columns per block in a frame. + * + * @param numRows + * The number of rows in the frame. + * @param numColumns + * The number of columns in the frame. + * @param numRowsPerBlock + * The number of rows per block in the frame. + * @param numColumnsPerBlock + * The number of columns per block in the frame. + */ + public FrameMetadata(int numRows, int numColumns, int numRowsPerBlock, int numColumnsPerBlock) { + this.numRows = (long) numRows; + this.numColumns = (long) numColumns; + this.numRowsPerBlock = numRowsPerBlock; + this.numColumnsPerBlock = numColumnsPerBlock; + } + + /** + * Constructor to create a FrameMetadata object based on the number of rows, + * the number of columns, the number of non-zero values, the number of rows + * per block, and the number of columns per block in a frame. + * + * @param numRows + * The number of rows in the frame. + * @param numColumns + * The number of columns in the frame. + * @param numNonZeros + * The number of non-zero values in the frame. + * @param numRowsPerBlock + * The number of rows per block in the frame. + * @param numColumnsPerBlock + * The number of columns per block in the frame. + */ + public FrameMetadata(Long numRows, Long numColumns, Long numNonZeros, Integer numRowsPerBlock, + Integer numColumnsPerBlock) { + this.numRows = numRows; + this.numColumns = numColumns; + this.numNonZeros = numNonZeros; + this.numRowsPerBlock = numRowsPerBlock; + this.numColumnsPerBlock = numColumnsPerBlock; + } + + /** + * Constructor to create a FrameMetadata object based on the number of rows, + * the number of columns, the number of non-zero values, the number of rows + * per block, and the number of columns per block in a frame. + * + * @param numRows + * The number of rows in the frame. + * @param numColumns + * The number of columns in the frame. + * @param numNonZeros + * The number of non-zero values in the frame. + * @param numRowsPerBlock + * The number of rows per block in the frame. + * @param numColumnsPerBlock + * The number of columns per block in the frame. + */ + public FrameMetadata(int numRows, int numColumns, int numNonZeros, int numRowsPerBlock, int numColumnsPerBlock) { + this.numRows = (long) numRows; + this.numColumns = (long) numColumns; + this.numNonZeros = (long) numNonZeros; + this.numRowsPerBlock = numRowsPerBlock; + this.numColumnsPerBlock = numColumnsPerBlock; + } + + /** + * Constructor to create a FrameMetadata object based on a + * MatrixCharacteristics object. + * + * @param matrixCharacteristics + * the frame metadata as a MatrixCharacteristics object + */ + public FrameMetadata(MatrixCharacteristics matrixCharacteristics) { + this.numRows = matrixCharacteristics.getRows(); + this.numColumns = matrixCharacteristics.getCols(); + this.numNonZeros = matrixCharacteristics.getNonZeros(); + this.numRowsPerBlock = matrixCharacteristics.getRowsPerBlock(); + this.numColumnsPerBlock = matrixCharacteristics.getColsPerBlock(); + } + + /** + * Constructor to create a FrameMetadata object based on the frame schema + * and a MatrixCharacteristics object. + * + * @param frameSchema + * The frame schema. + * @param matrixCharacteristics + * the frame metadata as a MatrixCharacteristics object + */ + public FrameMetadata(FrameSchema frameSchema, MatrixCharacteristics matrixCharacteristics) { + this.frameSchema = frameSchema; + this.numRows = matrixCharacteristics.getRows(); + this.numColumns = matrixCharacteristics.getCols(); + this.numNonZeros = matrixCharacteristics.getNonZeros(); + this.numRowsPerBlock = matrixCharacteristics.getRowsPerBlock(); + this.numColumnsPerBlock = matrixCharacteristics.getColsPerBlock(); + } + + /** + * Set the FrameMetadata fields based on a MatrixCharacteristics object. + * + * @param matrixCharacteristics + * the frame metadata as a MatrixCharacteristics object + */ + public void setMatrixCharacteristics(MatrixCharacteristics matrixCharacteristics) { + this.numRows = matrixCharacteristics.getRows(); + this.numColumns = matrixCharacteristics.getCols(); + this.numNonZeros = matrixCharacteristics.getNonZeros(); + this.numRowsPerBlock = matrixCharacteristics.getRowsPerBlock(); + this.numColumnsPerBlock = matrixCharacteristics.getColsPerBlock(); + } + + /** + * Obtain the number of rows + * + * @return the number of rows + */ + public Long getNumRows() { + return numRows; + } + + /** + * Set the number of rows + * + * @param numRows + * the number of rows + */ + public void setNumRows(Long numRows) { + this.numRows = numRows; + } + + /** + * Obtain the number of columns + * + * @return the number of columns + */ + public Long getNumColumns() { + return numColumns; + } + + /** + * Set the number of columns + * + * @param numColumns + * the number of columns + */ + public void setNumColumns(Long numColumns) { + this.numColumns = numColumns; + } + + /** + * Obtain the number of non-zero values + * + * @return the number of non-zero values + */ + public Long getNumNonZeros() { + return numNonZeros; + } + + /** + * Set the number of non-zero values + * + * @param numNonZeros + * the number of non-zero values + */ + public void setNumNonZeros(Long numNonZeros) { + this.numNonZeros = numNonZeros; + } + + /** + * Obtain the number of rows per block + * + * @return the number of rows per block + */ + public Integer getNumRowsPerBlock() { + return numRowsPerBlock; + } + + /** + * Set the number of rows per block + * + * @param numRowsPerBlock + * the number of rows per block + */ + public void setNumRowsPerBlock(Integer numRowsPerBlock) { + this.numRowsPerBlock = numRowsPerBlock; + } + + /** + * Obtain the number of columns per block + * + * @return the number of columns per block + */ + public Integer getNumColumnsPerBlock() { + return numColumnsPerBlock; + } + + /** + * Set the number of columns per block + * + * @param numColumnsPerBlock + * the number of columns per block + */ + public void setNumColumnsPerBlock(Integer numColumnsPerBlock) { + this.numColumnsPerBlock = numColumnsPerBlock; + } + + /** + * Convert the frame metadata to a MatrixCharacteristics object. If all + * field values are {@code null}, {@code null} is returned. + * + * @return the frame metadata as a MatrixCharacteristics object, or + * {@code null} if all field values are null + */ + public MatrixCharacteristics asMatrixCharacteristics() { + + if ((numRows == null) && (numColumns == null) && (numRowsPerBlock == null) && (numColumnsPerBlock == null) + && (numNonZeros == null)) { + return null; + } + + long nr = (numRows == null) ? -1 : numRows; + long nc = (numColumns == null) ? -1 : numColumns; + int nrpb = (numRowsPerBlock == null) ? MLContextUtil.defaultBlockSize() : numRowsPerBlock; + int ncpb = (numColumnsPerBlock == null) ? MLContextUtil.defaultBlockSize() : numColumnsPerBlock; + long nnz = (numNonZeros == null) ? -1 : numNonZeros; + MatrixCharacteristics mc = new MatrixCharacteristics(nr, nc, nrpb, ncpb, nnz); + return mc; + } + + @Override + public String toString() { + return "rows: " + fieldDisplay(numRows) + ", columns: " + fieldDisplay(numColumns) + ", non-zeros: " + + fieldDisplay(numNonZeros) + ", rows per block: " + fieldDisplay(numRowsPerBlock) + + ", columns per block: " + fieldDisplay(numColumnsPerBlock); + } + + private String fieldDisplay(Object field) { + if (field == null) { + return "None"; + } else { + return field.toString(); + } + } + + /** + * Obtain the frame format + * + * @return the frame format + */ + public FrameFormat getFrameFormat() { + return frameFormat; + } + + /** + * Set the frame format + * + * @param frameFormat + * the frame format + */ + public void setFrameFormat(FrameFormat frameFormat) { + this.frameFormat = frameFormat; + } + + /** + * Obtain the frame schema + * + * @return the frame schema + */ + public FrameSchema getFrameSchema() { + return frameSchema; + } + + /** + * Set the frame schema + * + * @param frameSchema + * the frame schema + */ + public void setFrameSchema(FrameSchema frameSchema) { + this.frameSchema = frameSchema; + } + +} http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/d39865e9/src/main/java/org/apache/sysml/api/mlcontext/FrameSchema.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/api/mlcontext/FrameSchema.java b/src/main/java/org/apache/sysml/api/mlcontext/FrameSchema.java new file mode 100644 index 0000000..040d77b --- /dev/null +++ b/src/main/java/org/apache/sysml/api/mlcontext/FrameSchema.java @@ -0,0 +1,128 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.sysml.api.mlcontext; + +import java.util.ArrayList; +import java.util.List; + +import org.apache.commons.lang3.StringUtils; +import org.apache.sysml.parser.Expression.ValueType; + +/** + * The frame schema, stored as a list of {@code ValueType} values. + * + */ +public class FrameSchema { + + private List<ValueType> schema = null; + + public FrameSchema() { + } + + /** + * Constructor that specifies the schema as a list of {@code ValueType} + * values. + * + * @param schema + * the frame schema + */ + public FrameSchema(List<ValueType> schema) { + this.schema = schema; + } + + /** + * Constructor that specifies the schema as a comma-separated string. + * + * @param schema + * the frame schema as a string + */ + public FrameSchema(String schema) { + this.schema = schemaStringToListOfValueTypes(schema); + } + + /** + * Obtain the frame schema + * + * @return the frame schema as a list of {@code ValueType} values + */ + public List<ValueType> getSchema() { + return schema; + } + + /** + * Set the frame schema + * + * @param schema + * the frame schema + */ + public void setSchema(List<ValueType> schema) { + this.schema = schema; + } + + /** + * Set the frame schema, specifying the frame schema as a comma-separated + * string + * + * @param schema + * the frame schema as a string + */ + public void setSchemaAsString(String schema) { + this.schema = schemaStringToListOfValueTypes(schema); + } + + /** + * Convert a schema string to a list of {@code ValueType} values + * + * @param schemaString + * the frame schema as a string + * @return the frame schema as a list of {@code ValueType} values + */ + private List<ValueType> schemaStringToListOfValueTypes(String schemaString) { + if (StringUtils.isBlank(schemaString)) { + return null; + } + String[] cols = schemaString.split(","); + List<ValueType> list = new ArrayList<ValueType>(); + for (String col : cols) { + list.add(ValueType.valueOf(col.toUpperCase())); + } + return list; + } + + /** + * Obtain the schema as a comma-separated string + * + * @return the frame schema as a string + */ + public String getSchemaAsString() { + if ((schema == null) || (schema.size() == 0)) { + return null; + } + StringBuilder sb = new StringBuilder(); + for (int i = 0; i < schema.size(); i++) { + ValueType vt = schema.get(i); + sb.append(vt); + if (i + 1 < schema.size()) { + sb.append(","); + } + } + return sb.toString(); + } +} http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/d39865e9/src/main/java/org/apache/sysml/api/mlcontext/MLContextConversionUtil.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/api/mlcontext/MLContextConversionUtil.java b/src/main/java/org/apache/sysml/api/mlcontext/MLContextConversionUtil.java index 7feb86a..b0f8432 100644 --- a/src/main/java/org/apache/sysml/api/mlcontext/MLContextConversionUtil.java +++ b/src/main/java/org/apache/sysml/api/mlcontext/MLContextConversionUtil.java @@ -198,16 +198,16 @@ public class MLContextConversionUtil { * name of the variable associated with the frame * @param frameBlock * frame as a FrameBlock - * @param matrixMetadata - * the matrix metadata + * @param frameMetadata + * the frame metadata * @return the {@code FrameBlock} converted to a {@code FrameObject} */ public static FrameObject frameBlockToFrameObject(String variableName, FrameBlock frameBlock, - MatrixMetadata matrixMetadata) { + FrameMetadata frameMetadata) { try { MatrixCharacteristics matrixCharacteristics; - if (matrixMetadata != null) { - matrixCharacteristics = matrixMetadata.asMatrixCharacteristics(); + if (frameMetadata != null) { + matrixCharacteristics = frameMetadata.asMatrixCharacteristics(); } else { matrixCharacteristics = new MatrixCharacteristics(); } @@ -273,16 +273,15 @@ public class MLContextConversionUtil { } /** - * Convert a {@code JavaPairRDD<Long, FrameBlock>} to a - * {@code FrameObject}. + * Convert a {@code JavaPairRDD<Long, FrameBlock>} to a {@code FrameObject}. * * @param variableName * name of the variable associated with the frame * @param binaryBlocks - * {@code JavaPairRDD<Long, FrameBlock>} representation - * of a binary-block frame - * @return the {@code JavaPairRDD<Long, FrameBlock>} frame - * converted to a {@code FrameObject} + * {@code JavaPairRDD<Long, FrameBlock>} representation of a + * binary-block frame + * @return the {@code JavaPairRDD<Long, FrameBlock>} frame converted to a + * {@code FrameObject} */ public static FrameObject binaryBlocksToFrameObject(String variableName, JavaPairRDD<Long, FrameBlock> binaryBlocks) { @@ -290,33 +289,32 @@ public class MLContextConversionUtil { } /** - * Convert a {@code JavaPairRDD<Long, FrameBlock>} to a - * {@code FrameObject}. + * Convert a {@code JavaPairRDD<Long, FrameBlock>} to a {@code FrameObject}. * * @param variableName * name of the variable associated with the frame * @param binaryBlocks - * {@code JavaPairRDD<Long, FrameBlock>} representation - * of a binary-block frame - * @param matrixMetadata - * the matrix metadata - * @return the {@code JavaPairRDD<Long, FrameBlock>} frame - * converted to a {@code FrameObject} + * {@code JavaPairRDD<Long, FrameBlock>} representation of a + * binary-block frame + * @param frameMetadata + * the frame metadata + * @return the {@code JavaPairRDD<Long, FrameBlock>} frame converted to a + * {@code FrameObject} */ - public static FrameObject binaryBlocksToFrameObject(String variableName, - JavaPairRDD<Long, FrameBlock> binaryBlocks, MatrixMetadata matrixMetadata) { + public static FrameObject binaryBlocksToFrameObject(String variableName, JavaPairRDD<Long, FrameBlock> binaryBlocks, + FrameMetadata frameMetadata) { MatrixCharacteristics matrixCharacteristics; - if (matrixMetadata != null) { - matrixCharacteristics = matrixMetadata.asMatrixCharacteristics(); + if (frameMetadata != null) { + matrixCharacteristics = frameMetadata.asMatrixCharacteristics(); } else { matrixCharacteristics = new MatrixCharacteristics(); } - MatrixFormatMetaData mtd = new MatrixFormatMetaData(matrixCharacteristics, - OutputInfo.BinaryBlockOutputInfo, InputInfo.BinaryBlockInputInfo); - FrameObject frameObject = new FrameObject(MLContextUtil.scratchSpace() + "/" + "temp_" - + System.nanoTime() + variableName, mtd); + MatrixFormatMetaData mtd = new MatrixFormatMetaData(matrixCharacteristics, OutputInfo.BinaryBlockOutputInfo, + InputInfo.BinaryBlockInputInfo); + FrameObject frameObject = new FrameObject( + MLContextUtil.scratchSpace() + "/" + "temp_" + System.nanoTime() + variableName, mtd); frameObject.setRDDHandle(new RDDObject(binaryBlocks, variableName)); return frameObject; } @@ -352,8 +350,8 @@ public class MLContextConversionUtil { if (matrixMetadata == null) { matrixMetadata = new MatrixMetadata(); } - JavaPairRDD<MatrixIndexes, MatrixBlock> binaryBlock = MLContextConversionUtil.dataFrameToBinaryBlocks(dataFrame, - matrixMetadata); + JavaPairRDD<MatrixIndexes, MatrixBlock> binaryBlock = MLContextConversionUtil + .dataFrameToMatrixBinaryBlocks(dataFrame, matrixMetadata); MatrixObject matrixObject = MLContextConversionUtil.binaryBlocksToMatrixObject(variableName, binaryBlock, matrixMetadata); return matrixObject; @@ -368,9 +366,8 @@ public class MLContextConversionUtil { * the Spark {@code DataFrame} * @return the {@code DataFrame} matrix converted to a converted to a * {@code FrameObject} - * @throws DMLRuntimeException */ - public static FrameObject dataFrameToFrameObject(String variableName, DataFrame dataFrame) throws DMLRuntimeException { + public static FrameObject dataFrameToFrameObject(String variableName, DataFrame dataFrame) { return dataFrameToFrameObject(variableName, dataFrame, null); } @@ -381,28 +378,38 @@ public class MLContextConversionUtil { * name of the variable associated with the frame * @param dataFrame * the Spark {@code DataFrame} - * @param matrixMetadata - * the matrix metadata + * @param frameMetadata + * the frame metadata * @return the {@code DataFrame} frame converted to a converted to a * {@code FrameObject} - * @throws DMLRuntimeException */ public static FrameObject dataFrameToFrameObject(String variableName, DataFrame dataFrame, - MatrixMetadata matrixMetadata) throws DMLRuntimeException { - if (matrixMetadata == null) { - matrixMetadata = new MatrixMetadata(); - } + FrameMetadata frameMetadata) { + try { + if (frameMetadata == null) { + frameMetadata = new FrameMetadata(); + } - JavaSparkContext javaSparkContext = MLContextUtil.getJavaSparkContext((MLContext) MLContextProxy.getActiveMLContext()); - boolean containsID = isDataFrameWithIDColumn(matrixMetadata); - MatrixCharacteristics matrixCharacteristics = matrixMetadata.asMatrixCharacteristics(); - JavaPairRDD<Long, FrameBlock> binaryBlock = - FrameRDDConverterUtils.dataFrameToBinaryBlock(javaSparkContext, dataFrame, - matrixCharacteristics, containsID); + JavaSparkContext javaSparkContext = MLContextUtil + .getJavaSparkContext((MLContext) MLContextProxy.getActiveMLContext()); + boolean containsID = isDataFrameWithIDColumn(frameMetadata); + MatrixCharacteristics matrixCharacteristics = frameMetadata.asMatrixCharacteristics(); + if (matrixCharacteristics == null) { + matrixCharacteristics = new MatrixCharacteristics(); + long rows = dataFrame.count(); + int cols = dataFrame.columns().length; + matrixCharacteristics.setDimension(rows, cols); + frameMetadata.setMatrixCharacteristics(matrixCharacteristics); + } + JavaPairRDD<Long, FrameBlock> binaryBlock = FrameRDDConverterUtils.dataFrameToBinaryBlock(javaSparkContext, + dataFrame, matrixCharacteristics, containsID); - FrameObject frameObject = MLContextConversionUtil.binaryBlocksToFrameObject(variableName, binaryBlock, - matrixMetadata); - return frameObject; + FrameObject frameObject = MLContextConversionUtil.binaryBlocksToFrameObject(variableName, binaryBlock, + frameMetadata); + return frameObject; + } catch (DMLRuntimeException e) { + throw new MLContextException("Exception converting DataFrame to FrameObject", e); + } } /** @@ -415,8 +422,8 @@ public class MLContextConversionUtil { * {@code JavaPairRDD<MatrixIndexes, * MatrixBlock>} binary-block matrix */ - public static JavaPairRDD<MatrixIndexes, MatrixBlock> dataFrameToBinaryBlocks(DataFrame dataFrame) { - return dataFrameToBinaryBlocks(dataFrame, null); + public static JavaPairRDD<MatrixIndexes, MatrixBlock> dataFrameToMatrixBinaryBlocks(DataFrame dataFrame) { + return dataFrameToMatrixBinaryBlocks(dataFrame, null); } /** @@ -431,7 +438,7 @@ public class MLContextConversionUtil { * {@code JavaPairRDD<MatrixIndexes, * MatrixBlock>} binary-block matrix */ - public static JavaPairRDD<MatrixIndexes, MatrixBlock> dataFrameToBinaryBlocks(DataFrame dataFrame, + public static JavaPairRDD<MatrixIndexes, MatrixBlock> dataFrameToMatrixBinaryBlocks(DataFrame dataFrame, MatrixMetadata matrixMetadata) { determineMatrixFormatIfNeeded(dataFrame, matrixMetadata); @@ -467,6 +474,23 @@ public class MLContextConversionUtil { } /** + * Convert a {@code DataFrame} to a {@code JavaPairRDD<Long, FrameBlock>} + * binary-block frame. + * + * @param dataFrame + * the Spark {@code DataFrame} + * @param frameMetadata + * the frame metadata + * @return the {@code DataFrame} matrix converted to a + * {@code JavaPairRDD<Long, + * FrameBlock>} binary-block frame + */ + public static JavaPairRDD<Long, FrameBlock> dataFrameToFrameBinaryBlocks(DataFrame dataFrame, + FrameMetadata frameMetadata) { + throw new MLContextException("dataFrameToFrameBinaryBlocks is unimplemented"); + } + + /** * If the MatrixFormat of the DataFrame has not been explicitly specified, * attempt to determine the proper MatrixFormat. * @@ -530,6 +554,25 @@ public class MLContextConversionUtil { } /** + * Return whether or not the DataFrame has an ID column. + * + * @param frameMetadata + * the frame metadata + * @return {@code true} if the DataFrame has an ID column, {@code false} + * otherwise. + */ + public static boolean isDataFrameWithIDColumn(FrameMetadata frameMetadata) { + if (frameMetadata == null) { + return false; + } + FrameFormat frameFormat = frameMetadata.getFrameFormat(); + if (frameFormat == null) { + return false; + } + return frameFormat.hasIDColumn(); + } + + /** * Return whether or not the DataFrame is vector-based. * * @param matrixMetadata @@ -645,27 +688,29 @@ public class MLContextConversionUtil { * name of the variable associated with the frame * @param javaRDD * the Java RDD of strings - * @param matrixMetadata - * matrix metadata + * @param frameMetadata + * frame metadata * @return the {@code JavaRDD<String>} converted to a {@code FrameObject} */ public static FrameObject javaRDDStringCSVToFrameObject(String variableName, JavaRDD<String> javaRDD, - MatrixMetadata matrixMetadata) { + FrameMetadata frameMetadata) { JavaPairRDD<LongWritable, Text> javaPairRDD = javaRDD.mapToPair(new ConvertStringToLongTextPair()); MatrixCharacteristics matrixCharacteristics; - if (matrixMetadata != null) { - matrixCharacteristics = matrixMetadata.asMatrixCharacteristics(); + if (frameMetadata != null) { + matrixCharacteristics = frameMetadata.asMatrixCharacteristics(); } else { matrixCharacteristics = new MatrixCharacteristics(); } JavaPairRDD<LongWritable, Text> javaPairRDDText = javaPairRDD.mapToPair(new CopyTextInputFunction()); - + JavaSparkContext jsc = MLContextUtil.getJavaSparkContext((MLContext) MLContextProxy.getActiveMLContext()); - FrameObject frameObject = new FrameObject(null, new MatrixFormatMetaData(matrixCharacteristics, OutputInfo.BinaryBlockOutputInfo, InputInfo.BinaryBlockInputInfo)); + FrameObject frameObject = new FrameObject(null, new MatrixFormatMetaData(matrixCharacteristics, + OutputInfo.BinaryBlockOutputInfo, InputInfo.BinaryBlockInputInfo)); JavaPairRDD<Long, FrameBlock> rdd; try { - rdd = FrameRDDConverterUtils.csvToBinaryBlock(jsc, javaPairRDDText, matrixCharacteristics, false, ",", false, -1); + rdd = FrameRDDConverterUtils.csvToBinaryBlock(jsc, javaPairRDDText, matrixCharacteristics, false, ",", + false, -1); } catch (DMLRuntimeException e) { e.printStackTrace(); return null; @@ -710,30 +755,31 @@ public class MLContextConversionUtil { * name of the variable associated with the frame * @param javaRDD * the Java RDD of strings - * @param matrixMetadata - * matrix metadata + * @param frameMetadata + * frame metadata * @return the {@code JavaRDD<String>} converted to a {@code FrameObject} */ public static FrameObject javaRDDStringIJVToFrameObject(String variableName, JavaRDD<String> javaRDD, - MatrixMetadata matrixMetadata) { + FrameMetadata frameMetadata) { JavaPairRDD<LongWritable, Text> javaPairRDD = javaRDD.mapToPair(new ConvertStringToLongTextPair()); MatrixCharacteristics matrixCharacteristics; - if (matrixMetadata != null) { - matrixCharacteristics = matrixMetadata.asMatrixCharacteristics(); + if (frameMetadata != null) { + matrixCharacteristics = frameMetadata.asMatrixCharacteristics(); } else { matrixCharacteristics = new MatrixCharacteristics(); } - + JavaPairRDD<LongWritable, Text> javaPairRDDText = javaPairRDD.mapToPair(new CopyTextInputFunction()); - + JavaSparkContext jsc = MLContextUtil.getJavaSparkContext((MLContext) MLContextProxy.getActiveMLContext()); - FrameObject frameObject = new FrameObject(null, new MatrixFormatMetaData(matrixCharacteristics, OutputInfo.BinaryBlockOutputInfo, InputInfo.BinaryBlockInputInfo)); + FrameObject frameObject = new FrameObject(null, new MatrixFormatMetaData(matrixCharacteristics, + OutputInfo.BinaryBlockOutputInfo, InputInfo.BinaryBlockInputInfo)); JavaPairRDD<Long, FrameBlock> rdd; try { List<ValueType> lschema = null; - if(lschema == null) - lschema = Collections.nCopies((int)matrixCharacteristics.getCols(), ValueType.STRING); + if (lschema == null) + lschema = Collections.nCopies((int) matrixCharacteristics.getCols(), ValueType.STRING); rdd = FrameRDDConverterUtils.textCellToBinaryBlock(jsc, javaPairRDDText, matrixCharacteristics, lschema); } catch (DMLRuntimeException e) { e.printStackTrace(); @@ -794,15 +840,15 @@ public class MLContextConversionUtil { * name of the variable associated with the frame * @param rdd * the RDD of strings - * @param matrixMetadata + * @param frameMetadata * frame metadata * @return the {@code RDD<String>} converted to a {@code FrameObject} */ public static FrameObject rddStringCSVToFrameObject(String variableName, RDD<String> rdd, - MatrixMetadata matrixMetadata) { + FrameMetadata frameMetadata) { ClassTag<String> tag = scala.reflect.ClassTag$.MODULE$.apply(String.class); JavaRDD<String> javaRDD = JavaRDD.fromRDD(rdd, tag); - return javaRDDStringCSVToFrameObject(variableName, javaRDD, matrixMetadata); + return javaRDDStringCSVToFrameObject(variableName, javaRDD, frameMetadata); } /** @@ -832,15 +878,15 @@ public class MLContextConversionUtil { * name of the variable associated with the frame * @param rdd * the RDD of strings - * @param matrixMetadata + * @param frameMetadata * frame metadata * @return the {@code RDD<String>} converted to a {@code FrameObject} */ public static FrameObject rddStringIJVToFrameObject(String variableName, RDD<String> rdd, - MatrixMetadata matrixMetadata) { + FrameMetadata frameMetadata) { ClassTag<String> tag = scala.reflect.ClassTag$.MODULE$.apply(String.class); JavaRDD<String> javaRDD = JavaRDD.fromRDD(rdd, tag); - return javaRDDStringIJVToFrameObject(variableName, javaRDD, matrixMetadata); + return javaRDDStringIJVToFrameObject(variableName, javaRDD, frameMetadata); } /** @@ -898,8 +944,7 @@ public class MLContextConversionUtil { } /** - * Convert a {@code FrameObject} to a {@code JavaRDD<String>} in CSV - * format. + * Convert a {@code FrameObject} to a {@code JavaRDD<String>} in CSV format. * * @param frameObject * the {@code FrameObject} @@ -908,7 +953,7 @@ public class MLContextConversionUtil { public static JavaRDD<String> frameObjectToJavaRDDStringCSV(FrameObject frameObject, String delimiter) { List<String> list = frameObjectToListStringCSV(frameObject, delimiter); - JavaSparkContext jsc = MLContextUtil.getJavaSparkContext((MLContext)MLContextProxy.getActiveMLContext()); + JavaSparkContext jsc = MLContextUtil.getJavaSparkContext((MLContext) MLContextProxy.getActiveMLContext()); JavaRDD<String> javaRDDStringCSV = jsc.parallelize(list); return javaRDDStringCSV; } @@ -933,8 +978,7 @@ public class MLContextConversionUtil { } /** - * Convert a {@code FrameObject} to a {@code JavaRDD<String>} in IJV - * format. + * Convert a {@code FrameObject} to a {@code JavaRDD<String>} in IJV format. * * @param frameObject * the {@code FrameObject} @@ -943,7 +987,7 @@ public class MLContextConversionUtil { public static JavaRDD<String> frameObjectToJavaRDDStringIJV(FrameObject frameObject) { List<String> list = frameObjectToListStringIJV(frameObject); - JavaSparkContext jsc = MLContextUtil.getJavaSparkContext((MLContext)MLContextProxy.getActiveMLContext()); + JavaSparkContext jsc = MLContextUtil.getJavaSparkContext((MLContext) MLContextProxy.getActiveMLContext()); JavaRDD<String> javaRDDStringIJV = jsc.parallelize(list); return javaRDDStringIJV; } @@ -1343,5 +1387,49 @@ public class MLContextConversionUtil { throw new MLContextException("DMLRuntimeException while converting matrix object to BinaryBlockMatrix", e); } } - + + /** + * Convert a {@code FrameObject} to a {@code BinaryBlockFrame}. + * + * @param frameObject + * the {@code FrameObject} + * @param sparkExecutionContext + * the Spark execution context + * @return the {@code FrameObject} converted to a {@code BinaryBlockFrame} + */ + public static BinaryBlockFrame frameObjectToBinaryBlockFrame(FrameObject frameObject, + SparkExecutionContext sparkExecutionContext) { + try { + @SuppressWarnings("unchecked") + JavaPairRDD<Long, FrameBlock> binaryBlock = (JavaPairRDD<Long, FrameBlock>) sparkExecutionContext + .getRDDHandleForFrameObject(frameObject, InputInfo.BinaryBlockInputInfo); + MatrixCharacteristics matrixCharacteristics = frameObject.getMatrixCharacteristics(); + FrameSchema fs = new FrameSchema(frameObject.getSchema()); + FrameMetadata fm = new FrameMetadata(fs, matrixCharacteristics); + BinaryBlockFrame binaryBlockFrame = new BinaryBlockFrame(binaryBlock, fm); + return binaryBlockFrame; + } catch (DMLRuntimeException e) { + throw new MLContextException("DMLRuntimeException while converting frame object to BinaryBlockFrame", e); + } + } + + /** + * Convert a {@code FrameObject} to a two-dimensional string array. + * + * @param frameObject + * the {@code FrameObject} + * @return the {@code FrameObject} converted to a {@code String[][]} + */ + public static String[][] frameObjectTo2DStringArray(FrameObject frameObject) { + try { + FrameBlock fb = frameObject.acquireRead(); + String[][] frame = DataConverter.convertToStringFrame(fb); + frameObject.release(); + return frame; + } catch (CacheException e) { + throw new MLContextException("CacheException while converting frame object to 2D string array", e); + } catch (DMLRuntimeException e) { + throw new MLContextException("DMLRuntimeException while converting frame object to 2D string array", e); + } + } } http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/d39865e9/src/main/java/org/apache/sysml/api/mlcontext/MLContextUtil.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/api/mlcontext/MLContextUtil.java b/src/main/java/org/apache/sysml/api/mlcontext/MLContextUtil.java index 9813174..566fba1 100644 --- a/src/main/java/org/apache/sysml/api/mlcontext/MLContextUtil.java +++ b/src/main/java/org/apache/sysml/api/mlcontext/MLContextUtil.java @@ -25,7 +25,6 @@ import java.text.DateFormat; import java.text.SimpleDateFormat; import java.util.Date; import java.util.HashMap; -import java.util.LinkedHashMap; import java.util.List; import java.util.Map; import java.util.Map.Entry; @@ -39,14 +38,15 @@ import org.apache.spark.SparkContext; import org.apache.spark.api.java.JavaPairRDD; import org.apache.spark.api.java.JavaRDD; import org.apache.spark.api.java.JavaSparkContext; +import org.apache.spark.mllib.linalg.Vector; import org.apache.spark.rdd.RDD; import org.apache.spark.sql.DataFrame; +import org.apache.spark.sql.Row; import org.apache.sysml.conf.CompilerConfig; import org.apache.sysml.conf.CompilerConfig.ConfigType; import org.apache.sysml.conf.ConfigurationManager; import org.apache.sysml.conf.DMLConfig; import org.apache.sysml.parser.ParseException; -import org.apache.sysml.runtime.DMLRuntimeException; import org.apache.sysml.runtime.controlprogram.LocalVariableMap; import org.apache.sysml.runtime.controlprogram.caching.FrameObject; import org.apache.sysml.runtime.controlprogram.caching.MatrixObject; @@ -76,7 +76,8 @@ public final class MLContextUtil { */ @SuppressWarnings("rawtypes") public static final Class[] COMPLEX_DATA_TYPES = { JavaRDD.class, RDD.class, DataFrame.class, - BinaryBlockMatrix.class, Matrix.class, (new double[][] {}).getClass(), MatrixBlock.class, URL.class }; + BinaryBlockMatrix.class, BinaryBlockFrame.class, Matrix.class, Frame.class, (new double[][] {}).getClass(), + MatrixBlock.class, URL.class }; /** * All data types supported by the MLContext API @@ -157,8 +158,8 @@ public final class MLContextUtil { */ public static void verifySparkVersionSupported(SparkContext sc) { if (!MLContextUtil.isSparkVersionSupported(sc.version())) { - throw new MLContextException("SystemML requires Spark " + MLContext.SYSTEMML_MINIMUM_SPARK_VERSION - + " or greater"); + throw new MLContextException( + "SystemML requires Spark " + MLContext.SYSTEMML_MINIMUM_SPARK_VERSION + " or greater"); } } @@ -201,40 +202,6 @@ public final class MLContextUtil { } /** - * Convenience method to generate a {@code Map<String, Object>} of key/value - * pairs. - * <p> - * Example:<br> - * {@code Map<String, Object> inputMap = MLContextUtil.generateInputMap("A", 1, "B", "two", "C", 3);} - * <br> - * <br> - * This is equivalent to:<br> - * <code>Map<String, Object> inputMap = new LinkedHashMap<String, Object>(){{ - * <br>put("A", 1); - * <br>put("B", "two"); - * <br>put("C", 3); - * <br>}};</code> - * - * @param objs - * List of String/Object pairs - * @return Map of String/Object pairs - * @throws MLContextException - * if the number of arguments is not an even number - */ - public static Map<String, Object> generateInputMap(Object... objs) { - int len = objs.length; - if ((len & 1) == 1) { - throw new MLContextException("The number of arguments needs to be an even number"); - } - Map<String, Object> map = new LinkedHashMap<String, Object>(); - int i = 0; - while (i < len) { - map.put((String) objs[i++], objs[i++]); - } - return map; - } - - /** * Verify that the types of input values are supported. * * @param inputs @@ -314,8 +281,8 @@ public final class MLContextUtil { } } if (!supported) { - throw new MLContextException("Input parameter (\"" + parameterName + "\") value type not supported: " - + o.getClass()); + throw new MLContextException( + "Input parameter (\"" + parameterName + "\") value type not supported: " + o.getClass()); } } @@ -412,7 +379,7 @@ public final class MLContextUtil { * @return input in SystemML data representation */ public static Data convertInputType(String parameterName, Object parameterValue) { - return convertInputType(parameterName, parameterValue, null, false); + return convertInputType(parameterName, parameterValue, null); } /** @@ -422,15 +389,16 @@ public final class MLContextUtil { * The name of the input parameter * @param parameterValue * The value of the input parameter - * @param matrixMetadata - * matrix metadata - * @param bFrame - * if input is of type frame + * @param metadata + * matrix/frame metadata * @return input in SystemML data representation */ - public static Data convertInputType(String parameterName, Object parameterValue, MatrixMetadata matrixMetadata, boolean bFrame) { + public static Data convertInputType(String parameterName, Object parameterValue, Metadata metadata) { String name = parameterName; Object value = parameterValue; + boolean hasMetadata = (metadata != null) ? true : false; + boolean hasMatrixMetadata = hasMetadata && (metadata instanceof MatrixMetadata) ? true : false; + boolean hasFrameMetadata = hasMetadata && (metadata instanceof FrameMetadata) ? true : false; if (name == null) { throw new MLContextException("Input parameter name is null"); } else if (value == null) { @@ -438,91 +406,138 @@ public final class MLContextUtil { } else if (value instanceof JavaRDD<?>) { @SuppressWarnings("unchecked") JavaRDD<String> javaRDD = (JavaRDD<String>) value; - if(!bFrame) { + + if (hasMatrixMetadata) { + MatrixMetadata matrixMetadata = (MatrixMetadata) metadata; MatrixObject matrixObject; - if ((matrixMetadata != null) && (matrixMetadata.getMatrixFormat() == MatrixFormat.IJV)) { - matrixObject = MLContextConversionUtil.javaRDDStringIJVToMatrixObject(name, javaRDD, matrixMetadata); + if (matrixMetadata.getMatrixFormat() == MatrixFormat.IJV) { + matrixObject = MLContextConversionUtil.javaRDDStringIJVToMatrixObject(name, javaRDD, + matrixMetadata); } else { - matrixObject = MLContextConversionUtil.javaRDDStringCSVToMatrixObject(name, javaRDD, matrixMetadata); + matrixObject = MLContextConversionUtil.javaRDDStringCSVToMatrixObject(name, javaRDD, + matrixMetadata); } return matrixObject; - } else { + } else if (hasFrameMetadata) { + FrameMetadata frameMetadata = (FrameMetadata) metadata; FrameObject frameObject; - if ((matrixMetadata != null) && (matrixMetadata.getMatrixFormat() == MatrixFormat.IJV)) { - frameObject = MLContextConversionUtil.javaRDDStringIJVToFrameObject(name, javaRDD, matrixMetadata); + if (frameMetadata.getFrameFormat() == FrameFormat.IJV) { + frameObject = MLContextConversionUtil.javaRDDStringIJVToFrameObject(name, javaRDD, frameMetadata); } else { - frameObject = MLContextConversionUtil.javaRDDStringCSVToFrameObject(name, javaRDD, matrixMetadata); + frameObject = MLContextConversionUtil.javaRDDStringCSVToFrameObject(name, javaRDD, frameMetadata); } return frameObject; + } else if (!hasMetadata) { + String firstLine = javaRDD.first(); + boolean isAllNumbers = isCSVLineAllNumbers(firstLine); + if (isAllNumbers) { + MatrixObject matrixObject = MLContextConversionUtil.javaRDDStringCSVToMatrixObject(name, javaRDD); + return matrixObject; + } else { + FrameObject frameObject = MLContextConversionUtil.javaRDDStringCSVToFrameObject(name, javaRDD); + return frameObject; + } } + } else if (value instanceof RDD<?>) { @SuppressWarnings("unchecked") RDD<String> rdd = (RDD<String>) value; - if(!bFrame) { + + if (hasMatrixMetadata) { + MatrixMetadata matrixMetadata = (MatrixMetadata) metadata; MatrixObject matrixObject; - if ((matrixMetadata != null) && (matrixMetadata.getMatrixFormat() == MatrixFormat.IJV)) { + if (matrixMetadata.getMatrixFormat() == MatrixFormat.IJV) { matrixObject = MLContextConversionUtil.rddStringIJVToMatrixObject(name, rdd, matrixMetadata); } else { matrixObject = MLContextConversionUtil.rddStringCSVToMatrixObject(name, rdd, matrixMetadata); } return matrixObject; - } else { + } else if (hasFrameMetadata) { + FrameMetadata frameMetadata = (FrameMetadata) metadata; FrameObject frameObject; - if ((matrixMetadata != null) && (matrixMetadata.getMatrixFormat() == MatrixFormat.IJV)) { - frameObject = MLContextConversionUtil.rddStringIJVToFrameObject(name, rdd, matrixMetadata); + if (frameMetadata.getFrameFormat() == FrameFormat.IJV) { + frameObject = MLContextConversionUtil.rddStringIJVToFrameObject(name, rdd, frameMetadata); } else { - frameObject = MLContextConversionUtil.rddStringCSVToFrameObject(name, rdd, matrixMetadata); + frameObject = MLContextConversionUtil.rddStringCSVToFrameObject(name, rdd, frameMetadata); } return frameObject; + } else if (!hasMetadata) { + String firstLine = rdd.first(); + boolean isAllNumbers = isCSVLineAllNumbers(firstLine); + if (isAllNumbers) { + MatrixObject matrixObject = MLContextConversionUtil.rddStringCSVToMatrixObject(name, rdd); + return matrixObject; + } else { + FrameObject frameObject = MLContextConversionUtil.rddStringCSVToFrameObject(name, rdd); + return frameObject; + } } - } else if (value instanceof MatrixBlock) { MatrixBlock matrixBlock = (MatrixBlock) value; MatrixObject matrixObject = MLContextConversionUtil.matrixBlockToMatrixObject(name, matrixBlock, - matrixMetadata); + (MatrixMetadata) metadata); return matrixObject; } else if (value instanceof FrameBlock) { FrameBlock frameBlock = (FrameBlock) value; FrameObject frameObject = MLContextConversionUtil.frameBlockToFrameObject(name, frameBlock, - matrixMetadata); + (FrameMetadata) metadata); return frameObject; } else if (value instanceof DataFrame) { DataFrame dataFrame = (DataFrame) value; - if(!bFrame) { - MatrixObject matrixObject = MLContextConversionUtil - .dataFrameToMatrixObject(name, dataFrame, matrixMetadata); + + if (hasMatrixMetadata) { + MatrixObject matrixObject = MLContextConversionUtil.dataFrameToMatrixObject(name, dataFrame, + (MatrixMetadata) metadata); return matrixObject; - } else { - FrameObject frameObject = null; - try { - frameObject = MLContextConversionUtil - .dataFrameToFrameObject(name, dataFrame, matrixMetadata); - } catch (DMLRuntimeException e) { - e.printStackTrace(); - } + } else if (hasFrameMetadata) { + FrameObject frameObject = MLContextConversionUtil.dataFrameToFrameObject(name, dataFrame, + (FrameMetadata) metadata); return frameObject; + } else if (!hasMetadata) { + Row firstRow = dataFrame.first(); + boolean looksLikeMatrix = doesRowLookLikeMatrixRow(firstRow); + if (looksLikeMatrix) { + MatrixObject matrixObject = MLContextConversionUtil.dataFrameToMatrixObject(name, dataFrame); + return matrixObject; + } else { + FrameObject frameObject = MLContextConversionUtil.dataFrameToFrameObject(name, dataFrame); + return frameObject; + } } } else if (value instanceof BinaryBlockMatrix) { BinaryBlockMatrix binaryBlockMatrix = (BinaryBlockMatrix) value; - if (matrixMetadata == null) { - matrixMetadata = binaryBlockMatrix.getMatrixMetadata(); + if (metadata == null) { + metadata = binaryBlockMatrix.getMatrixMetadata(); } JavaPairRDD<MatrixIndexes, MatrixBlock> binaryBlocks = binaryBlockMatrix.getBinaryBlocks(); MatrixObject matrixObject = MLContextConversionUtil.binaryBlocksToMatrixObject(name, binaryBlocks, - matrixMetadata); + (MatrixMetadata) metadata); return matrixObject; + } else if (value instanceof BinaryBlockFrame) { + BinaryBlockFrame binaryBlockFrame = (BinaryBlockFrame) value; + if (metadata == null) { + metadata = binaryBlockFrame.getFrameMetadata(); + } + JavaPairRDD<Long, FrameBlock> binaryBlocks = binaryBlockFrame.getBinaryBlocks(); + FrameObject frameObject = MLContextConversionUtil.binaryBlocksToFrameObject(name, binaryBlocks, + (FrameMetadata) metadata); + return frameObject; } else if (value instanceof Matrix) { Matrix matrix = (Matrix) value; MatrixObject matrixObject = matrix.asMatrixObject(); return matrixObject; + } else if (value instanceof Frame) { + Frame frame = (Frame) value; + FrameObject frameObject = frame.asFrameObject(); + return frameObject; } else if (value instanceof double[][]) { double[][] doubleMatrix = (double[][]) value; MatrixObject matrixObject = MLContextConversionUtil.doubleMatrixToMatrixObject(name, doubleMatrix, - matrixMetadata); + (MatrixMetadata) metadata); return matrixObject; } else if (value instanceof URL) { URL url = (URL) value; - MatrixObject matrixObject = MLContextConversionUtil.urlToMatrixObject(name, url, matrixMetadata); + MatrixObject matrixObject = MLContextConversionUtil.urlToMatrixObject(name, url, (MatrixMetadata) metadata); return matrixObject; } else if (value instanceof Integer) { Integer i = (Integer) value; @@ -545,6 +560,56 @@ public final class MLContextUtil { } /** + * If no metadata is supplied for an RDD or JavaRDD, this method can be used + * to determine whether the data appears to be matrix (or a frame) + * + * @param line + * a line of the RDD + * @return {@code true} if all the csv-separated values are numbers, + * {@code false} otherwise + */ + public static boolean isCSVLineAllNumbers(String line) { + if (StringUtils.isBlank(line)) { + return false; + } + String[] parts = line.split(","); + for (int i = 0; i < parts.length; i++) { + String part = parts[i].trim(); + try { + Double.parseDouble(part); + } catch (NumberFormatException e) { + return false; + } + } + return true; + } + + /** + * If no metadata is supplied for a DataFrame, this method can be used to + * determine whether the data appears to be a matrix (or a frame) + * + * @param row + * a row in the DataFrame + * @return {@code true} if the row appears to be a matrix row, {@code false} + * otherwise + */ + public static boolean doesRowLookLikeMatrixRow(Row row) { + for (int i = 0; i < row.length(); i++) { + Object object = row.get(i); + if (object instanceof Vector) { + return true; + } + String str = object.toString(); + try { + Double.parseDouble(str); + } catch (NumberFormatException e) { + return false; + } + } + return true; + } + + /** * Return the default matrix block size. * * @return the default matrix block size @@ -559,7 +624,7 @@ public final class MLContextUtil { * @return the lcoation of the scratch space directory */ public static String scratchSpace() { - return ConfigurationManager.getScratchSpace(); + return ConfigurationManager.getScratchSpace(); } /** @@ -738,7 +803,7 @@ public final class MLContextUtil { * the map of inputs * @return the script inputs represented as a String */ - public static String displayInputs(String name, Map<String, Object> map) { + public static String displayInputs(String name, Map<String, Object> map, LocalVariableMap symbolTable) { StringBuilder sb = new StringBuilder(); sb.append(name); sb.append(":\n"); @@ -764,6 +829,11 @@ public final class MLContextUtil { sb.append(" ("); sb.append(type); + if (doesSymbolTableContainMatrixObject(symbolTable, key)) { + sb.append(" as Matrix"); + } else if (doesSymbolTableContainFrameObject(symbolTable, key)) { + sb.append(" as Frame"); + } sb.append(") "); sb.append(key); @@ -890,14 +960,78 @@ public final class MLContextUtil { return sb.toString(); } - public static SparkContext getSparkContext(MLContext mlContext) - { + /** + * Obtain the Spark Context + * + * @param mlContext + * the SystemML MLContext + * @return the Spark Context + */ + public static SparkContext getSparkContext(MLContext mlContext) { return mlContext.getSparkContext(); } - public static JavaSparkContext getJavaSparkContext(MLContext mlContext) - { + /** + * Obtain the Java Spark Context + * + * @param mlContext + * the SystemML MLContext + * @return the Java Spark Context + */ + public static JavaSparkContext getJavaSparkContext(MLContext mlContext) { return new JavaSparkContext(mlContext.getSparkContext()); } + /** + * Determine if the symbol table contains a FrameObject with the given + * variable name. + * + * @param symbolTable + * the LocalVariableMap + * @param variableName + * the variable name + * @return {@code true} if the variable in the symbol table is a + * FrameObject, {@code false} otherwise. + */ + public static boolean doesSymbolTableContainFrameObject(LocalVariableMap symbolTable, String variableName) { + if (symbolTable == null) { + return false; + } + Data data = symbolTable.get(variableName); + if (data == null) { + return false; + } + if (data instanceof FrameObject) { + return true; + } else { + return false; + } + } + + /** + * Determine if the symbol table contains a MatrixObject with the given + * variable name. + * + * @param symbolTable + * the LocalVariableMap + * @param variableName + * the variable name + * @return {@code true} if the variable in the symbol table is a + * MatrixObject, {@code false} otherwise. + */ + public static boolean doesSymbolTableContainMatrixObject(LocalVariableMap symbolTable, String variableName) { + if (symbolTable == null) { + return false; + } + Data data = symbolTable.get(variableName); + if (data == null) { + return false; + } + if (data instanceof MatrixObject) { + return true; + } else { + return false; + } + } + }
