Repository: incubator-systemml Updated Branches: refs/heads/master 129f966dd -> 6608d0ffc
[SYSTEMML-561] New cp frame left indexing operations, tests/cleanup Project: http://git-wip-us.apache.org/repos/asf/incubator-systemml/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-systemml/commit/ba29d2d0 Tree: http://git-wip-us.apache.org/repos/asf/incubator-systemml/tree/ba29d2d0 Diff: http://git-wip-us.apache.org/repos/asf/incubator-systemml/diff/ba29d2d0 Branch: refs/heads/master Commit: ba29d2d05a03e2dc588ff1f060e280ab673228dd Parents: 129f966 Author: Matthias Boehm <[email protected]> Authored: Sun Apr 3 21:34:48 2016 -0700 Committer: Matthias Boehm <[email protected]> Committed: Sun Apr 3 21:34:48 2016 -0700 ---------------------------------------------------------------------- .../cp/FrameIndexingCPInstruction.java | 27 +++ .../runtime/io/FrameReaderBinaryBlock.java | 17 +- .../sysml/runtime/matrix/data/FrameBlock.java | 37 +++- .../functions/jmlc/FrameLeftIndexingTest.java | 173 +++++++++++++++++++ src/test/scripts/functions/jmlc/transform5.dml | 47 +++++ 5 files changed, 280 insertions(+), 21 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/ba29d2d0/src/main/java/org/apache/sysml/runtime/instructions/cp/FrameIndexingCPInstruction.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/runtime/instructions/cp/FrameIndexingCPInstruction.java b/src/main/java/org/apache/sysml/runtime/instructions/cp/FrameIndexingCPInstruction.java index 6c2ed64..7443503 100644 --- a/src/main/java/org/apache/sysml/runtime/instructions/cp/FrameIndexingCPInstruction.java +++ b/src/main/java/org/apache/sysml/runtime/instructions/cp/FrameIndexingCPInstruction.java @@ -19,6 +19,8 @@ package org.apache.sysml.runtime.instructions.cp; +import org.apache.sysml.parser.Expression.DataType; +import org.apache.sysml.parser.Expression.ValueType; import org.apache.sysml.runtime.DMLRuntimeException; import org.apache.sysml.runtime.DMLUnsupportedOperationException; import org.apache.sysml.runtime.controlprogram.context.ExecutionContext; @@ -56,6 +58,31 @@ public final class FrameIndexingCPInstruction extends IndexingCPInstruction //unpin output ec.setFrameOutput(output.getName(), out); } + //left indexing + else if ( opcode.equalsIgnoreCase("leftIndex")) + { + FrameBlock lin = ec.getFrameInput(input1.getName()); + FrameBlock out = null; + + if(input2.getDataType() == DataType.FRAME) { //FRAME<-FRAME + FrameBlock rin = ec.getFrameInput(input2.getName()); + out = lin.leftIndexingOperations(rin, ixrange, new FrameBlock()); + ec.releaseFrameInput(input2.getName()); + } + else { //FRAME<-SCALAR + if(!ixrange.isScalar()) + throw new DMLRuntimeException("Invalid index range of scalar leftindexing: "+ixrange.toString()+"." ); + ScalarObject scalar = ec.getScalarInput(input2.getName(), ValueType.DOUBLE, input2.isLiteral()); + out = new FrameBlock(lin); + out.set((int)ixrange.rowStart, (int)ixrange.colStart, scalar.getStringValue()); + } + + //unpin lhs input + ec.releaseFrameInput(input1.getName()); + + //unpin output + ec.setFrameOutput(output.getName(), out); + } else throw new DMLRuntimeException("Invalid opcode (" + opcode +") encountered in FrameIndexingCPInstruction."); } http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/ba29d2d0/src/main/java/org/apache/sysml/runtime/io/FrameReaderBinaryBlock.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/runtime/io/FrameReaderBinaryBlock.java b/src/main/java/org/apache/sysml/runtime/io/FrameReaderBinaryBlock.java index 1c224d8..b24f6c3 100644 --- a/src/main/java/org/apache/sysml/runtime/io/FrameReaderBinaryBlock.java +++ b/src/main/java/org/apache/sysml/runtime/io/FrameReaderBinaryBlock.java @@ -104,8 +104,7 @@ public class FrameReaderBinaryBlock extends FrameReader try { //note: next(key, value) does not yet exploit the given serialization classes, record reader does but is generally slower. - while( reader.next(key, value) ) - { + while( reader.next(key, value) ) { int row_offset = (int)(key.getRowIndex()-1)*ConfigurationManager.getBlocksize(); int col_offset = (int)(key.getColumnIndex()-1)*ConfigurationManager.getBlocksize(); @@ -113,24 +112,18 @@ public class FrameReaderBinaryBlock extends FrameReader int cols = value.getNumColumns(); //bound check per block - if( row_offset + rows < 0 || row_offset + rows > rlen || col_offset + cols<0 || col_offset + cols > clen ) - { + if( row_offset + rows < 0 || row_offset + rows > rlen || col_offset + cols<0 || col_offset + cols > clen ) { throw new IOException("Frame block ["+(row_offset+1)+":"+(row_offset+rows)+","+(col_offset+1)+":"+(col_offset+cols)+"] " + "out of overall frame range [1:"+rlen+",1:"+clen+"]."); } - dest.copy( 0, rows-1, - 0, cols-1, - value, - row_offset); + dest.copy( row_offset, row_offset+rows-1, + 0, cols-1, value); } } - finally - { + finally { IOUtilFunctions.closeSilently(reader); } } - } - } http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/ba29d2d0/src/main/java/org/apache/sysml/runtime/matrix/data/FrameBlock.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/runtime/matrix/data/FrameBlock.java b/src/main/java/org/apache/sysml/runtime/matrix/data/FrameBlock.java index c2afe33..5a4913b 100644 --- a/src/main/java/org/apache/sysml/runtime/matrix/data/FrameBlock.java +++ b/src/main/java/org/apache/sysml/runtime/matrix/data/FrameBlock.java @@ -406,6 +406,14 @@ public class FrameBlock implements Writable, CacheBlock, Externalizable /////// // indexing and append operations + public FrameBlock leftIndexingOperations(FrameBlock rhsFrame, IndexRange ixrange, FrameBlock ret) + throws DMLRuntimeException + { + return leftIndexingOperations(rhsFrame, + (int)ixrange.rowStart, (int)ixrange.rowEnd, + (int)ixrange.colStart, (int)ixrange.colEnd, ret); + } + /** * * @param rhsFrame @@ -496,16 +504,16 @@ public class FrameBlock implements Writable, CacheBlock, Externalizable for( int j=cl; j<=cu; j++ ) { ret._schema.add(_schema.get(j)); ret._colnames.add(_colnames.get(j)); - } - + } ret._numRows = ru-rl+1; + //copy output data if(ret._coldata.size() == 0) - //copy output data for( int j=cl; j<=cu; j++ ) ret._coldata.add(_coldata.get(j).slice(rl,ru)); else - ret.copy(rl, ru, cl, cu, this, 0); + for( int j=cl; j<=cu; j++ ) + ret._coldata.get(j-cl).set(0, ru-rl+1, _coldata.get(j), rl); return ret; } @@ -575,8 +583,11 @@ public class FrameBlock implements Writable, CacheBlock, Externalizable return ret; } - public void copy(FrameBlock src) - { + /** + * + * @param src + */ + public void copy(FrameBlock src) { //allocate ensureAllocatedColumns(src.getNumRows()); @@ -585,15 +596,23 @@ public class FrameBlock implements Writable, CacheBlock, Externalizable _coldata.get(i).set(0, src.getNumRows()-1, src._coldata.get(i)); } - - public void copy(int rl, int ru, int cl, int cu, FrameBlock src, int rlDest) + /** + * + * @param rl + * @param ru + * @param cl + * @param cu + * @param src + * @throws DMLRuntimeException + */ + public void copy(int rl, int ru, int cl, int cu, FrameBlock src) throws DMLRuntimeException { ensureAllocatedColumns(ru-rl+1); //copy values for( int i=cl; i<=cu; i++ ) - _coldata.get(i).set(rlDest, rlDest+(ru-rl), src._coldata.get(i), rl); + _coldata.get(i).set(rl, ru, src._coldata.get(i-cl)); } http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/ba29d2d0/src/test/java/org/apache/sysml/test/integration/functions/jmlc/FrameLeftIndexingTest.java ---------------------------------------------------------------------- diff --git a/src/test/java/org/apache/sysml/test/integration/functions/jmlc/FrameLeftIndexingTest.java b/src/test/java/org/apache/sysml/test/integration/functions/jmlc/FrameLeftIndexingTest.java new file mode 100644 index 0000000..b1c91df --- /dev/null +++ b/src/test/java/org/apache/sysml/test/integration/functions/jmlc/FrameLeftIndexingTest.java @@ -0,0 +1,173 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.sysml.test.integration.functions.jmlc; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.HashMap; + +import org.junit.Assert; +import org.junit.Test; +import org.apache.sysml.api.DMLException; +import org.apache.sysml.api.jmlc.Connection; +import org.apache.sysml.api.jmlc.PreparedScript; +import org.apache.sysml.api.jmlc.ResultVariables; +import org.apache.sysml.runtime.controlprogram.parfor.stat.Timing; +import org.apache.sysml.test.integration.AutomatedTestBase; +import org.apache.sysml.test.integration.TestConfiguration; +import org.apache.sysml.test.utils.TestUtils; + +/** + * + * + */ +public class FrameLeftIndexingTest extends AutomatedTestBase +{ + private final static String TEST_NAME1 = "transform5"; + private final static String TEST_DIR = "functions/jmlc/"; + private final static String TEST_CLASS_DIR = TEST_DIR + FrameLeftIndexingTest.class.getSimpleName() + "/"; + + private final static int rows = 700; + private final static int cols = 3; + + private final static int nRuns = 2; + + private final static double sparsity1 = 0.7; + private final static double sparsity2 = 0.1; + + + @Override + public void setUp() { + addTestConfiguration(TEST_NAME1, new TestConfiguration(TEST_CLASS_DIR, TEST_NAME1, new String[] { "F2" }) ); + } + + @Test + public void testJMLCTransformDense() throws IOException { + runJMLCReuseTest(TEST_NAME1, false, false); + } + + @Test + public void testJMLCTransformSparse() throws IOException { + runJMLCReuseTest(TEST_NAME1, true, false); + } + + @Test + public void testJMLCTransformDenseReuse() throws IOException { + runJMLCReuseTest(TEST_NAME1, false, true); + } + + @Test + public void testJMLCTransformSparseReuse() throws IOException { + runJMLCReuseTest(TEST_NAME1, true, true); + } + + /** + * + * @param sparseM1 + * @param sparseM2 + * @param instType + * @throws IOException + */ + private void runJMLCReuseTest( String testname, boolean sparse, boolean modelReuse ) + throws IOException + { + String TEST_NAME = testname; + + TestConfiguration config = getTestConfiguration(TEST_NAME); + loadTestConfiguration(config); + + //generate inputs + double[][] Fd = TestUtils.round(getRandomMatrix(rows, cols, 0.51, 7.49, sparse?sparsity2:sparsity1, 1234)); + String[][] F1s = FrameTransformTest.createFrameData(Fd); + String[][] Ms = FrameTransformTest.createRecodeMaps(F1s); + + //run DML via JMLC + ArrayList<String[][]> F2set = execDMLScriptviaJMLC( TEST_NAME, F1s, Ms, modelReuse ); + + //check correct result + for( String[][] data : F2set ) + for( int i=0; i<F1s.length; i++ ) + for( int j=0; j<F1s[i].length; j++ ) + Assert.assertEquals("Wrong result: "+data[i][j]+".", data[i][j], F1s[i][j]); + } + + /** + * + * @param X + * @return + * @throws DMLException + * @throws IOException + */ + private ArrayList<String[][]> execDMLScriptviaJMLC( String testname, String[][] F1, String[][] M, boolean modelReuse) + throws IOException + { + Timing time = new Timing(true); + + ArrayList<String[][]> ret = new ArrayList<String[][]>(); + + //establish connection to SystemML + Connection conn = new Connection(); + + try + { + //prepare input arguments + HashMap<String,String> args = new HashMap<String,String>(); + args.put("$TRANSFORM_SPEC1", "{ \"ids\": true ,\"recode\": [ 1, 2] }"); + args.put("$TRANSFORM_SPEC2", "{ \"ids\": true ,\"recode\": [ 1] }"); + + //read and precompile script + String script = conn.readScript(SCRIPT_DIR + TEST_DIR + testname + ".dml"); + PreparedScript pstmt = conn.prepareScript(script, args, new String[]{"F1","M"}, new String[]{"F2"}, false); + + if( modelReuse ) + pstmt.setFrame("M", M, true); + + //execute script multiple times + for( int i=0; i<nRuns; i++ ) + { + //bind input parameters + if( !modelReuse ) + pstmt.setFrame("M", M); + pstmt.setFrame("F1", F1); + + //execute script + ResultVariables rs = pstmt.executeScript(); + + //get output parameter + String[][] Y = rs.getFrame("F2"); + ret.add(Y); //keep result for comparison + } + } + catch(Exception ex) + { + ex.printStackTrace(); + throw new IOException(ex); + } + finally + { + if( conn != null ) + conn.close(); + } + + System.out.println("JMLC scoring w/ "+nRuns+" runs in "+time.stop()+"ms."); + + return ret; + } +} \ No newline at end of file http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/ba29d2d0/src/test/scripts/functions/jmlc/transform5.dml ---------------------------------------------------------------------- diff --git a/src/test/scripts/functions/jmlc/transform5.dml b/src/test/scripts/functions/jmlc/transform5.dml new file mode 100644 index 0000000..569e715 --- /dev/null +++ b/src/test/scripts/functions/jmlc/transform5.dml @@ -0,0 +1,47 @@ +#------------------------------------------------------------- +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +#------------------------------------------------------------- + +#read data frame and meta data +F1 = read($F1, data_type="frame", format="csv"); #new data +M = read($M, data_type="frame", format="csv"); #existing recode maps +specJson1 = $TRANSFORM_SPEC1 +specJson2 = $TRANSFORM_SPEC2 + +#frame indexing +F11 = F1[,1:2]; +F12 = F1[,3]; +M1 = M[,1:2]; +M2 = M[,3]; + +X1 = transformapply(target=F11, meta=M1, spec=specJson1); +X2 = transformapply(target=F12, meta=M2, spec=specJson2); + +X1 = X1 * (X1!=77.7); +X2 = X2 * (X2!=77.7); + +X1 = append(X1, matrix(0, rows=nrow(X1), cols=1)); +F2 = transformdecode(target=X1, meta=M1, spec=specJson1); +F22 = transformdecode(target=X2, meta=M2, spec=specJson2); + +#frame leftindexing +F2[,3] = F22; + +write(F2, $F2);
