Repository: incubator-systemml Updated Branches: refs/heads/master 4f8648593 -> 6fad65d1d
[MINOR] Added external builtin functions for performing cumsumprod and rowclassmeet Project: http://git-wip-us.apache.org/repos/asf/incubator-systemml/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-systemml/commit/6fad65d1 Tree: http://git-wip-us.apache.org/repos/asf/incubator-systemml/tree/6fad65d1 Diff: http://git-wip-us.apache.org/repos/asf/incubator-systemml/diff/6fad65d1 Branch: refs/heads/master Commit: 6fad65d1d5ae4f1e65bdf99a68faf8396f280331 Parents: 4f86485 Author: Niketan Pansare <[email protected]> Authored: Wed Feb 1 14:48:33 2017 -0800 Committer: Niketan Pansare <[email protected]> Committed: Wed Feb 1 14:50:12 2017 -0800 ---------------------------------------------------------------------- .../org/apache/sysml/udf/lib/CumSumProd.java | 243 +++++++++++++++++++ .../org/apache/sysml/udf/lib/RowClassMeet.java | 230 ++++++++++++++++++ 2 files changed, 473 insertions(+) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/6fad65d1/src/main/java/org/apache/sysml/udf/lib/CumSumProd.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/udf/lib/CumSumProd.java b/src/main/java/org/apache/sysml/udf/lib/CumSumProd.java new file mode 100644 index 0000000..bdb2231 --- /dev/null +++ b/src/main/java/org/apache/sysml/udf/lib/CumSumProd.java @@ -0,0 +1,243 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.sysml.udf.lib; + +import java.io.IOException; +import java.util.Iterator; + +import org.apache.sysml.runtime.DMLRuntimeException; +import org.apache.sysml.runtime.controlprogram.caching.CacheException; +import org.apache.sysml.runtime.matrix.data.IJV; +import org.apache.sysml.runtime.matrix.data.InputInfo; +import org.apache.sysml.runtime.matrix.data.MatrixBlock; +import org.apache.sysml.runtime.matrix.data.OutputInfo; +import org.apache.sysml.udf.FunctionParameter; +import org.apache.sysml.udf.Matrix; +import org.apache.sysml.udf.PackageFunction; +import org.apache.sysml.udf.Scalar; +import org.apache.sysml.udf.Matrix.ValueType; + +/** + * Variant of cumsum: + * Computes following two functions: + * + * cumsum_prod = function (Matrix[double] X, Matrix[double] C, double start) return (Matrix[double] Y) + * # Computes the following recurrence in log-number of steps: + * # Y [1, ] = X [1, ] + C [1, ] * start; + * # Y [i+1, ] = X [i+1, ] + C [i+1, ] * Y [i, ] + * { + * Y = X; P = C; m = nrow(X); k = 1; + * Y [1, ] = Y [1, ] + C [1, ] * start; + * while (k < m) { + * Y [k+1 : m, ] = Y [k+1 : m, ] + Y [1 : m-k, ] * P [k+1 : m, ]; + * P [k+1 : m, ] = P [1 : m-k, ] * P [k+1 : m, ]; + * k = 2 * k; + * } + * } + * + * cumsum_prod_reverse = function (Matrix[double] X, Matrix[double] C, double start) return (Matrix[double] Y) + * # Computes the reverse recurrence in log-number of steps: + * # Y [m, ] = X [m, ] + C [m, ] * start; + * # Y [i-1, ] = X [i-1, ] + C [i-1, ] * Y [i, ] + * { + * Y = X; P = C; m = nrow(X); k = 1; + * Y [m, ] = Y [m, ] + C [m, ] * start; + * while (k < m) { + * Y [1 : m-k, ] = Y [1 : m-k, ] + Y [k+1 : m, ] * P [1 : m-k, ]; + * P [1 : m-k, ] = P [k+1 : m, ] * P [1 : m-k, ]; + * k = 2 * k; + * } + * } + * + * The API of this external built-in function is as follows: + * + * func = externalFunction(matrix[double] X, matrix[double] C, double start, boolean isReverse) return (matrix[double] Y) + * implemented in (classname="org.apache.sysml.udf.lib.CumSumProd",exectype="mem"); + */ +public class CumSumProd extends PackageFunction { + + private static final long serialVersionUID = -7883258699548686065L; + private Matrix ret; + private MatrixBlock retMB, X, C; + private double start; + private boolean isReverse; + + @Override + public int getNumFunctionOutputs() { + return 1; + } + + @Override + public FunctionParameter getFunctionOutput(int pos) { + if(pos == 0) + return ret; + else + throw new RuntimeException("CumSumProd produces only one output"); + } + + @Override + public void execute() { + try { + X = ((Matrix) getFunctionInput(0)).getMatrixObject().acquireRead(); + C = ((Matrix) getFunctionInput(1)).getMatrixObject().acquireRead(); + if(X.getNumRows() != C.getNumRows()) + throw new RuntimeException("Number of rows of X and C should match"); + if( X.getNumColumns() != C.getNumColumns() && C.getNumColumns() != 1 ) + throw new RuntimeException("Incorrect Number of columns of X and C (Expected C to be of same dimension or a vector)"); + start = Double.parseDouble(((Scalar)getFunctionInput(2)).getValue()); + isReverse = Boolean.parseBoolean(((Scalar)getFunctionInput(3)).getValue()); + + numRetRows = X.getNumRows(); + numRetCols = X.getNumColumns(); + allocateOutput(); + + // Copy X to Y + denseBlock = retMB.getDenseBlock(); + if(X.isInSparseFormat()) { + Iterator<IJV> iter = X.getSparseBlockIterator(); + while(iter.hasNext()) { + IJV ijv = iter.next(); + denseBlock[ijv.getI()*numRetCols + ijv.getJ()] = ijv.getV(); + } + } + else { + if(X.getDenseBlock() != null) + System.arraycopy(X.getDenseBlock(), 0, denseBlock, 0, denseBlock.length); + } + + if(!isReverse) { + // Y [1, ] = X [1, ] + C [1, ] * start; + // Y [i+1, ] = X [i+1, ] + C [i+1, ] * Y [i, ] + addCNConstant(0, start); + for(int i = 1; i < numRetRows; i++) { + addC(i, true); + } + } + else { + // Y [m, ] = X [m, ] + C [m, ] * start; + // Y [i-1, ] = X [i-1, ] + C [i-1, ] * Y [i, ] + addCNConstant(numRetRows-1, start); + for(int i = numRetRows - 2; i >= 0; i--) { + addC(i, false); + } + } + + ((Matrix) getFunctionInput(1)).getMatrixObject().release(); + ((Matrix) getFunctionInput(0)).getMatrixObject().release(); + } catch (CacheException e) { + throw new RuntimeException("Error while executing CumSumProd", e); + } + + retMB.recomputeNonZeros(); + try { + retMB.examSparsity(); + ret.setMatrixDoubleArray(retMB, OutputInfo.BinaryBlockOutputInfo, InputInfo.BinaryBlockInputInfo); + } catch (DMLRuntimeException e) { + throw new RuntimeException("Error while executing CumSumProd", e); + } catch (IOException e) { + throw new RuntimeException("Error while executing CumSumProd", e); + } + } + + int numRetRows; int numRetCols; + double [] denseBlock; + + private void addCNConstant(int i, double constant) { + boolean isCVector = C.getNumColumns() != ret.getNumCols(); + if(C.isInSparseFormat()) { + Iterator<IJV> iter = C.getSparseBlockIterator(i, i+1); + while(iter.hasNext()) { + IJV ijv = iter.next(); + if(!isCVector) + denseBlock[ijv.getI()*numRetCols + ijv.getJ()] += ijv.getV() * constant; + else { + double val = ijv.getV(); + for(int j = ijv.getI()*numRetCols; j < (ijv.getI()+1)*numRetCols; j++) { + denseBlock[j] += val*constant; + } + } + } + } + else { + double [] CBlk = C.getDenseBlock(); + if(CBlk != null) { + if(!isCVector) { + for(int j = i*numRetCols; j < (i+1)*numRetCols; j++) { + denseBlock[j] += CBlk[j]*constant; + } + } + else { + for(int j = i*numRetCols; j < (i+1)*numRetCols; j++) { + denseBlock[j] += CBlk[i]*constant; + } + } + } + } + } + + private void addC(int i, boolean addPrevRow) { + boolean isCVector = C.getNumColumns() != ret.getNumCols(); + if(C.isInSparseFormat()) { + Iterator<IJV> iter = C.getSparseBlockIterator(i, i+1); + while(iter.hasNext()) { + IJV ijv = iter.next(); + if(!isCVector) { + if(addPrevRow) + denseBlock[ijv.getI()*numRetCols + ijv.getJ()] += ijv.getV() * denseBlock[(ijv.getI()-1)*numRetCols + ijv.getJ()]; + else + denseBlock[ijv.getI()*numRetCols + ijv.getJ()] += ijv.getV() * denseBlock[(ijv.getI()+1)*numRetCols + ijv.getJ()]; + } + else { + double val = ijv.getV(); + for(int j = ijv.getI()*numRetCols; j < (ijv.getI()+1)*numRetCols; j++) { + double val1 = addPrevRow ? denseBlock[(ijv.getI()-1)*numRetCols + ijv.getJ()] : denseBlock[(ijv.getI()+1)*numRetCols + ijv.getJ()]; + denseBlock[j] += val*val1; + } + } + } + } + else { + double [] CBlk = C.getDenseBlock(); + if(CBlk != null) { + if(!isCVector) { + for(int j = i*numRetCols; j < (i+1)*numRetCols; j++) { + double val1 = addPrevRow ? denseBlock[j-numRetCols] : denseBlock[j+numRetCols]; + denseBlock[j] += CBlk[j]*val1; + } + } + else { + for(int j = i*numRetCols; j < (i+1)*numRetCols; j++) { + double val1 = addPrevRow ? denseBlock[j-numRetCols] : denseBlock[j+numRetCols]; + denseBlock[j] += CBlk[i]*val1; + } + } + } + } + } + + private void allocateOutput() { + String dir = createOutputFilePathAndName( "TMP" ); + ret = new Matrix( dir, numRetRows, numRetCols, ValueType.Double ); + retMB = new MatrixBlock((int) numRetRows, (int) numRetCols, false); + retMB.allocateDenseBlock(); + } + + + +} http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/6fad65d1/src/main/java/org/apache/sysml/udf/lib/RowClassMeet.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/udf/lib/RowClassMeet.java b/src/main/java/org/apache/sysml/udf/lib/RowClassMeet.java new file mode 100644 index 0000000..d24d0e8 --- /dev/null +++ b/src/main/java/org/apache/sysml/udf/lib/RowClassMeet.java @@ -0,0 +1,230 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.sysml.udf.lib; + +import java.io.IOException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Comparator; +import java.util.Iterator; +import java.util.Map.Entry; +import java.util.TreeMap; + +import org.apache.sysml.runtime.DMLRuntimeException; +import org.apache.sysml.runtime.controlprogram.caching.CacheException; +import org.apache.sysml.runtime.matrix.data.IJV; +import org.apache.sysml.runtime.matrix.data.InputInfo; +import org.apache.sysml.runtime.matrix.data.MatrixBlock; +import org.apache.sysml.runtime.matrix.data.OutputInfo; +import org.apache.sysml.udf.FunctionParameter; +import org.apache.sysml.udf.Matrix; +import org.apache.sysml.udf.PackageFunction; +import org.apache.sysml.udf.Matrix.ValueType; + +/** + * Performs following operation: + * # Computes the intersection ("meet") of equivalence classes for + * # each row of A and B, excluding 0-valued cells. + * # INPUT: + * # A, B = matrices whose rows contain that row's class labels; + * # for each i, rows A [i, ] and B [i, ] define two + * # equivalence relations on some of the columns, which + * # we want to intersect + * # A [i, j] == A [i, k] != 0 if and only if (j ~ k) as defined + * # by row A [i, ]; + * # A [i, j] == 0 means that j is excluded by A [i, ] + * # B [i, j] is analogous + * # NOTE 1: Either nrow(A) == nrow(B), or exactly one of A or B + * # has one row that "applies" to each row of the other matrix. + * # NOTE 2: If ncol(A) != ncol(B), we pad extra 0-columns up to + * # max (ncol(A), ncol(B)). + * # OUTPUT: + * # Both C and N have the same size as (the max of) A and B. + * # C = matrix whose rows contain class labels that represent + * # the intersection (coarsest common refinement) of the + * # corresponding rows of A and B. + * # C [i, j] == C [i, k] != 0 if and only if (j ~ k) as defined + * # by both A [i, ] and B [j, ] + * # C [i, j] == 0 if and only if A [i, j] == 0 or B [i, j] == 0 + * # Additionally, we guarantee that non-0 labels in C [i, ] + * # will be integers from 1 to max (C [i, ]) without gaps. + * # For A and B the labels can be arbitrary. + * # N = matrix with class-size information for C-cells + * # N [i, j] = count of {C [i, k] | C [i, j] == C [i, k] != 0} + * + */ +public class RowClassMeet extends PackageFunction { + + private static final long serialVersionUID = 1L; + private Matrix CMat, NMat; + private MatrixBlock A, B, C, N; + private int nr, nc; + + @Override + public int getNumFunctionOutputs() { + return 2; + } + + @Override + public FunctionParameter getFunctionOutput(int pos) { + if(pos == 0) + return CMat; + else if(pos == 1) + return NMat; + else + throw new RuntimeException("RowClassMeet produces only one output"); + } + + + public class ClassLabels { + public double aVal; + public double bVal; + public ClassLabels(double aVal, double bVal) { + this.aVal = aVal; + this.bVal = bVal; + } + } + + public class ClassLabelComparator implements Comparator<ClassLabels> { + Integer tmp1, tmp2; + @Override + public int compare(ClassLabels o1, ClassLabels o2) { + if(o1.aVal != o2.aVal) { + tmp1 = (int) o1.aVal; + tmp2 = (int) o2.aVal; + } + else { + tmp1 = (int) o1.bVal; + tmp2 = (int) o2.bVal; + } + return tmp1.compareTo(tmp2); + } + } + + double [] getRow(MatrixBlock B, double [] bRow, int i) { + if(B.getNumRows() == 1) + i = 0; + Arrays.fill(bRow, 0); + if(B.isInSparseFormat()) { + Iterator<IJV> iter = B.getSparseBlockIterator(i, i+1); + while(iter.hasNext()) { + IJV ijv = iter.next(); + bRow[ijv.getJ()] = ijv.getV(); + } + } + else { + double [] denseBlk = B.getDenseBlock(); + if(denseBlk != null) + System.arraycopy(denseBlk, i*B.getNumColumns(), bRow, 0, B.getNumColumns()); + } + return bRow; + } + + @Override + public void execute() { + try { + A = ((Matrix) getFunctionInput(0)).getMatrixObject().acquireRead(); + B = ((Matrix) getFunctionInput(1)).getMatrixObject().acquireRead(); + nr = Math.max(A.getNumRows(), B.getNumRows()); + nc = Math.max(A.getNumColumns(), B.getNumColumns()); + + double [] bRow = new double[B.getNumColumns()]; + CMat = new Matrix( createOutputFilePathAndName( "TMP" ), nr, nc, ValueType.Double ); + C = new MatrixBlock(nr, nc, false); + C.allocateDenseBlock(); + NMat = new Matrix( createOutputFilePathAndName( "TMP" ), nr, nc, ValueType.Double ); + N = new MatrixBlock(nr, nc, false); + N.allocateDenseBlock(); + + double [] cBlk = C.getDenseBlock(); + double [] nBlk = N.getDenseBlock(); + + if(B.getNumRows() == 1) + getRow(B, bRow, 0); + + for(int i = 0; i < A.getNumRows(); i++) { + if(B.getNumRows() != 1) + getRow(B, bRow, i); + + // Create class labels + TreeMap<ClassLabels, ArrayList<Integer>> classLabelMapping = new TreeMap<ClassLabels, ArrayList<Integer>>(new ClassLabelComparator()); + if(A.isInSparseFormat()) { + Iterator<IJV> iter = A.getSparseBlockIterator(i, i+1); + while(iter.hasNext()) { + IJV ijv = iter.next(); + int j = ijv.getJ(); + double aVal = ijv.getV(); + if(aVal != 0 && bRow[j] != 0) { + ClassLabels key = new ClassLabels(aVal, bRow[j]); + if(!classLabelMapping.containsKey(key)) + classLabelMapping.put(key, new ArrayList<Integer>()); + classLabelMapping.get(key).add(j); + } + } + } + else { + double [] denseBlk = A.getDenseBlock(); + if(denseBlk != null) { + int offset = i*A.getNumColumns(); + for(int j = 0; j < A.getNumColumns(); j++) { + double aVal = denseBlk[offset + j]; + if(aVal != 0 && bRow[j] != 0) { + ClassLabels key = new ClassLabels(aVal, bRow[j]); + if(!classLabelMapping.containsKey(key)) + classLabelMapping.put(key, new ArrayList<Integer>()); + classLabelMapping.get(key).add(j); + } + } + } + } + + + int labelID = 1; + for(Entry<ClassLabels, ArrayList<Integer>> entry : classLabelMapping.entrySet()) { + double nVal = entry.getValue().size(); + for(Integer j : entry.getValue()) { + nBlk[i*nc + j] = nVal; + cBlk[i*nc + j] = labelID; + } + labelID++; + } + } + + ((Matrix) getFunctionInput(0)).getMatrixObject().release(); + ((Matrix) getFunctionInput(1)).getMatrixObject().release(); + } catch (CacheException e) { + throw new RuntimeException("Error while executing RowClassMeet", e); + } + + try { + C.recomputeNonZeros(); + C.examSparsity(); + CMat.setMatrixDoubleArray(C, OutputInfo.BinaryBlockOutputInfo, InputInfo.BinaryBlockInputInfo); + N.recomputeNonZeros(); + N.examSparsity(); + NMat.setMatrixDoubleArray(N, OutputInfo.BinaryBlockOutputInfo, InputInfo.BinaryBlockInputInfo); + } catch (DMLRuntimeException e) { + throw new RuntimeException("Error while executing RowClassMeet", e); + } catch (IOException e) { + throw new RuntimeException("Error while executing RowClassMeet", e); + } + } + + +}
