Repository: incubator-systemml Updated Branches: refs/heads/master ae71d00ed -> 474050653
[SYSTEMML-1656] Fix BLAS integration (corrupted matrix block apis) The dispatching between operations over uncompressed or compressed matrix blocks is realized via late binding. The recently added BLAS integration introduced additional matrix block APIs without overriding them for compressed matrix blocks. This corrupted, for example, matrix-vector operations over compressed matrices as they are mistakenly routed to uncompressed operations. This patch fixes this issue my removing these unnecessary API extensions and simplifying the CP aggregate binary instruction to avoid the impression that all compressed matrices are handled through the vector-matrix branch. Project: http://git-wip-us.apache.org/repos/asf/incubator-systemml/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-systemml/commit/47405065 Tree: http://git-wip-us.apache.org/repos/asf/incubator-systemml/tree/47405065 Diff: http://git-wip-us.apache.org/repos/asf/incubator-systemml/diff/47405065 Branch: refs/heads/master Commit: 474050653b8bf598ef19b8e60945b42c43f9fcb0 Parents: ae71d00 Author: Matthias Boehm <mboe...@gmail.com> Authored: Thu Jun 1 14:10:21 2017 -0700 Committer: Matthias Boehm <mboe...@gmail.com> Committed: Thu Jun 1 15:00:23 2017 -0700 ---------------------------------------------------------------------- .../cp/AggregateBinaryCPInstruction.java | 19 +-- .../sysml/runtime/matrix/data/MatrixBlock.java | 16 +- .../functions/compress/CompressedL2SVM.java | 148 +++++++++++++++++++ .../functions/compress/CompressedLinregCG.java | 24 ++- src/test/scripts/functions/compress/L2SVM.R | 108 ++++++++++++++ 5 files changed, 275 insertions(+), 40 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/47405065/src/main/java/org/apache/sysml/runtime/instructions/cp/AggregateBinaryCPInstruction.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/runtime/instructions/cp/AggregateBinaryCPInstruction.java b/src/main/java/org/apache/sysml/runtime/instructions/cp/AggregateBinaryCPInstruction.java index 3bcdbdb..359728f 100644 --- a/src/main/java/org/apache/sysml/runtime/instructions/cp/AggregateBinaryCPInstruction.java +++ b/src/main/java/org/apache/sysml/runtime/instructions/cp/AggregateBinaryCPInstruction.java @@ -31,7 +31,6 @@ import org.apache.sysml.runtime.matrix.data.MatrixBlock; import org.apache.sysml.runtime.matrix.operators.AggregateBinaryOperator; import org.apache.sysml.runtime.matrix.operators.AggregateOperator; import org.apache.sysml.runtime.matrix.operators.Operator; -import org.apache.sysml.utils.NativeHelper; public class AggregateBinaryCPInstruction extends BinaryCPInstruction { @@ -72,20 +71,16 @@ public class AggregateBinaryCPInstruction extends BinaryCPInstruction { //get inputs MatrixBlock matBlock1 = ec.getMatrixInput(input1.getName()); - MatrixBlock matBlock2 = ec.getMatrixInput(input2.getName()); + MatrixBlock matBlock2 = ec.getMatrixInput(input2.getName()); + + //compute matrix multiplication + AggregateBinaryOperator ab_op = (AggregateBinaryOperator) _optr; + MatrixBlock main = (matBlock2 instanceof CompressedMatrixBlock) ? matBlock2 : matBlock1; + MatrixBlock ret = (MatrixBlock) main.aggregateBinaryOperations(matBlock1, matBlock2, new MatrixBlock(), ab_op); - //compute matrix multiplication - AggregateBinaryOperator ab_op = (AggregateBinaryOperator) _optr; - MatrixBlock soresBlock = null; - if( matBlock2 instanceof CompressedMatrixBlock ) - soresBlock = (MatrixBlock) (matBlock2.aggregateBinaryOperations(matBlock1, matBlock2, new MatrixBlock(), ab_op)); - else { - soresBlock = (MatrixBlock) (matBlock1.aggregateBinaryOperations(matBlock1, matBlock2, new MatrixBlock(), ab_op, NativeHelper.isNativeLibraryLoaded())); - } - //release inputs/outputs ec.releaseMatrixInput(input1.getName()); ec.releaseMatrixInput(input2.getName()); - ec.setMatrixOutput(output.getName(), soresBlock); + ec.setMatrixOutput(output.getName(), ret); } } http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/47405065/src/main/java/org/apache/sysml/runtime/matrix/data/MatrixBlock.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/runtime/matrix/data/MatrixBlock.java b/src/main/java/org/apache/sysml/runtime/matrix/data/MatrixBlock.java index ac66241..780c98b 100644 --- a/src/main/java/org/apache/sysml/runtime/matrix/data/MatrixBlock.java +++ b/src/main/java/org/apache/sysml/runtime/matrix/data/MatrixBlock.java @@ -4894,20 +4894,10 @@ public class MatrixBlock extends MatrixValue implements CacheBlock, Externalizab public MatrixValue aggregateBinaryOperations(MatrixIndexes m1Index, MatrixValue m1Value, MatrixIndexes m2Index, MatrixValue m2Value, MatrixValue result, AggregateBinaryOperator op ) throws DMLRuntimeException { - return aggregateBinaryOperations(m1Value, m2Value, result, op, NativeHelper.isNativeLibraryLoaded()); + return aggregateBinaryOperations(m1Value, m2Value, result, op); } - public MatrixValue aggregateBinaryOperations(MatrixIndexes m1Index, MatrixValue m1Value, MatrixIndexes m2Index, MatrixValue m2Value, - MatrixValue result, AggregateBinaryOperator op, boolean enableNativeBLAS ) throws DMLRuntimeException - { - return aggregateBinaryOperations(m1Value, m2Value, result, op, enableNativeBLAS); - } - - public MatrixValue aggregateBinaryOperations(MatrixValue m1Value, MatrixValue m2Value, MatrixValue result, AggregateBinaryOperator op) throws DMLRuntimeException { - return aggregateBinaryOperations(m1Value, m2Value, result, op, NativeHelper.isNativeLibraryLoaded()); - } - - public MatrixValue aggregateBinaryOperations(MatrixValue m1Value, MatrixValue m2Value, MatrixValue result, AggregateBinaryOperator op, boolean nativeMatMult) + public MatrixValue aggregateBinaryOperations(MatrixValue m1Value, MatrixValue m2Value, MatrixValue result, AggregateBinaryOperator op) throws DMLRuntimeException { //check input types, dimensions, configuration @@ -4933,7 +4923,7 @@ public class MatrixBlock extends MatrixValue implements CacheBlock, Externalizab ret.reset(rl, cl, sp.sparse, sp.estimatedNonZeros); //compute matrix multiplication (only supported binary aggregate operation) - if( nativeMatMult ) + if( NativeHelper.isNativeLibraryLoaded() ) LibMatrixNative.matrixMult(m1, m2, ret, op.getNumThreads()); else if( op.getNumThreads() > 1 ) LibMatrixMult.matrixMult(m1, m2, ret, op.getNumThreads()); http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/47405065/src/test/java/org/apache/sysml/test/integration/functions/compress/CompressedL2SVM.java ---------------------------------------------------------------------- diff --git a/src/test/java/org/apache/sysml/test/integration/functions/compress/CompressedL2SVM.java b/src/test/java/org/apache/sysml/test/integration/functions/compress/CompressedL2SVM.java new file mode 100644 index 0000000..9dab4a6 --- /dev/null +++ b/src/test/java/org/apache/sysml/test/integration/functions/compress/CompressedL2SVM.java @@ -0,0 +1,148 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.sysml.test.integration.functions.compress; + +import java.io.File; +import java.util.HashMap; + +import org.apache.sysml.api.DMLScript; +import org.apache.sysml.api.DMLScript.RUNTIME_PLATFORM; +import org.apache.sysml.lops.LopProperties.ExecType; +import org.apache.sysml.runtime.compress.CompressedMatrixBlock; +import org.apache.sysml.runtime.matrix.data.MatrixValue.CellIndex; +import org.apache.sysml.test.integration.AutomatedTestBase; +import org.apache.sysml.test.integration.TestConfiguration; +import org.apache.sysml.test.utils.TestUtils; +import org.junit.Test; + +/** + * + */ +public class CompressedL2SVM extends AutomatedTestBase +{ + private final static String TEST_NAME1 = "L2SVM"; + private final static String TEST_DIR = "functions/compress/"; + private final static String TEST_CONF = "SystemML-config-compress.xml"; + private final static File TEST_CONF_FILE = new File(SCRIPT_DIR + TEST_DIR, TEST_CONF); + + private final static double eps = 1e-4; + + private final static int rows = 1468; + private final static int cols = 980; + + private final static double sparsity1 = 0.7; //dense + private final static double sparsity2 = 0.1; //sparse + + private final static int intercept = 0; + private final static double epsilon = 0.000000001; + private final static double maxiter = 10; + + @Override + public void setUp() { + TestUtils.clearAssertionInformation(); + addTestConfiguration(TEST_NAME1, new TestConfiguration(TEST_DIR, TEST_NAME1, new String[] { "w" })); + } + + @Test + public void testL2SVMDenseCP() { + runL2SVMTest(TEST_NAME1, false, ExecType.CP); + } + + @Test + public void testL2SVMSparseCP() { + runL2SVMTest(TEST_NAME1, true, ExecType.CP); + } + + @Test + public void testL2SVMDenseSP() { + runL2SVMTest(TEST_NAME1, false, ExecType.SPARK); + } + + @Test + public void testL2SVMSparseSP() { + runL2SVMTest(TEST_NAME1, true, ExecType.SPARK); + } + + /** + * + * @param sparseM1 + * @param sparseM2 + * @param instType + */ + private void runL2SVMTest( String testname,boolean sparse, ExecType instType) + { + //rtplatform for MR + RUNTIME_PLATFORM platformOld = rtplatform; + switch( instType ){ + case MR: rtplatform = RUNTIME_PLATFORM.HADOOP; break; + case SPARK: rtplatform = RUNTIME_PLATFORM.SPARK; break; + default: rtplatform = RUNTIME_PLATFORM.HYBRID_SPARK; break; + } + + boolean sparkConfigOld = DMLScript.USE_LOCAL_SPARK_CONFIG; + if( rtplatform == RUNTIME_PLATFORM.HYBRID_SPARK || rtplatform == RUNTIME_PLATFORM.SPARK ) + DMLScript.USE_LOCAL_SPARK_CONFIG = true; + + try + { + String TEST_NAME = testname; + TestConfiguration config = getTestConfiguration(TEST_NAME); + loadTestConfiguration(config); + + fullDMLScriptName = "scripts/algorithms/l2-svm.dml"; + programArgs = new String[]{ "-explain", "-stats", "-nvargs", "X="+input("X"), "Y="+input("Y"), + "icpt="+String.valueOf(intercept), "tol="+String.valueOf(epsilon), "reg=0.001", + "maxiter="+String.valueOf(maxiter), "model="+output("w"), "Log= "}; + + rCmd = getRCmd(inputDir(), String.valueOf(intercept),String.valueOf(epsilon), + String.valueOf(maxiter), expectedDir()); + + //generate actual datasets + double[][] X = getRandomMatrix(rows, cols, 0, 1, sparse?sparsity2:sparsity1, 714); + writeInputMatrixWithMTD("X", X, true); + double[][] y = TestUtils.round(getRandomMatrix(rows, 1, 0, 1, 1.0, 136)); + writeInputMatrixWithMTD("Y", y, true); + + runTest(true, false, null, -1); + runRScript(true); + + //compare matrices + HashMap<CellIndex, Double> dmlfile = readDMLMatrixFromHDFS("w"); + HashMap<CellIndex, Double> rfile = readRMatrixFromFS("w"); + TestUtils.compareMatrices(dmlfile, rfile, eps, "Stat-DML", "Stat-R"); + } + finally { + rtplatform = platformOld; + DMLScript.USE_LOCAL_SPARK_CONFIG = sparkConfigOld; + CompressedMatrixBlock.ALLOW_DDC_ENCODING = true; + } + } + + /** + * Override default configuration with custom test configuration to ensure + * scratch space and local temporary directory locations are also updated. + */ + @Override + protected File getConfigTemplateFile() { + // Instrumentation in this test's output log to show custom configuration file used for template. + System.out.println("This test case overrides default configuration with " + TEST_CONF_FILE.getPath()); + return TEST_CONF_FILE; + } +} \ No newline at end of file http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/47405065/src/test/java/org/apache/sysml/test/integration/functions/compress/CompressedLinregCG.java ---------------------------------------------------------------------- diff --git a/src/test/java/org/apache/sysml/test/integration/functions/compress/CompressedLinregCG.java b/src/test/java/org/apache/sysml/test/integration/functions/compress/CompressedLinregCG.java index ccb423f..170442c 100644 --- a/src/test/java/org/apache/sysml/test/integration/functions/compress/CompressedLinregCG.java +++ b/src/test/java/org/apache/sysml/test/integration/functions/compress/CompressedLinregCG.java @@ -62,32 +62,26 @@ public class CompressedLinregCG extends AutomatedTestBase } @Test - public void testGDFOLinregCGDenseCP() { - runGDFOTest(TEST_NAME1, false, ExecType.CP); + public void testLinregCGDenseCP() { + runLinregCGTest(TEST_NAME1, false, ExecType.CP); } @Test - public void testGDFOLinregCGSparseCP() { - runGDFOTest(TEST_NAME1, true, ExecType.CP); + public void testLinregCGSparseCP() { + runLinregCGTest(TEST_NAME1, true, ExecType.CP); } @Test - public void testGDFOLinregCGDenseSP() { - runGDFOTest(TEST_NAME1, false, ExecType.SPARK); + public void testLinregCGDenseSP() { + runLinregCGTest(TEST_NAME1, false, ExecType.SPARK); } @Test - public void testGDFOLinregCGSparseSP() { - runGDFOTest(TEST_NAME1, true, ExecType.SPARK); + public void testLinregCGSparseSP() { + runLinregCGTest(TEST_NAME1, true, ExecType.SPARK); } - /** - * - * @param sparseM1 - * @param sparseM2 - * @param instType - */ - private void runGDFOTest( String testname,boolean sparse, ExecType instType) + private void runLinregCGTest( String testname,boolean sparse, ExecType instType) { //rtplatform for MR RUNTIME_PLATFORM platformOld = rtplatform; http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/47405065/src/test/scripts/functions/compress/L2SVM.R ---------------------------------------------------------------------- diff --git a/src/test/scripts/functions/compress/L2SVM.R b/src/test/scripts/functions/compress/L2SVM.R new file mode 100644 index 0000000..165ace8 --- /dev/null +++ b/src/test/scripts/functions/compress/L2SVM.R @@ -0,0 +1,108 @@ +#------------------------------------------------------------- +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +#------------------------------------------------------------- + +args <- commandArgs(TRUE) +library("Matrix") + +X = as.matrix(readMM(paste(args[1], "X.mtx", sep=""))); +Y = as.matrix(readMM(paste(args[1], "Y.mtx", sep=""))); +intercept = as.integer(args[2]); +epsilon = as.double(args[3]); +lambda = 0.001; +maxiterations = as.integer(args[4]); + +check_min = min(Y) +check_max = max(Y) +num_min = sum(Y == check_min) +num_max = sum(Y == check_max) +if(num_min + num_max != nrow(Y)){ + print("please check Y, it should contain only 2 labels") +}else{ + if(check_min != -1 | check_max != +1) + Y = 2/(check_max - check_min)*Y - (check_min + check_max)/(check_max - check_min) +} + +dimensions = ncol(X) + +if (intercept == 1) { + ones = matrix(1, rows=num_samples, cols=1) + X = cbind(X, ones); +} + +num_rows_in_w = dimensions +if(intercept == 1){ + num_rows_in_w = num_rows_in_w + 1 +} +w = matrix(0, num_rows_in_w, 1) + +g_old = t(X) %*% Y +s = g_old + +Xw = matrix(0,nrow(X),1) +iter = 0 +positive_label = check_max +negative_label = check_min + +continue = TRUE +while(continue && iter < maxiterations){ + t = 0 + Xd = X %*% s + wd = lambda * sum(w * s) + dd = lambda * sum(s * s) + continue1 = TRUE + while(continue1){ + tmp_Xw = Xw + t*Xd + out = 1 - Y * (tmp_Xw) + sv = which(out > 0) + g = wd + t*dd - sum(out[sv] * Y[sv] * Xd[sv]) + h = dd + sum(Xd[sv] * Xd[sv]) + t = t - g/h + continue1 = (g*g/h >= 1e-10) + } + + w = w + t*s + Xw = Xw + t*Xd + + out = 1 - Y * (X %*% w) + sv = which(out > 0) + obj = 0.5 * sum(out[sv] * out[sv]) + lambda/2 * sum(w * w) + g_new = t(X[sv,]) %*% (out[sv] * Y[sv]) - lambda * w + + print(paste("OBJ : ", obj)) + + continue = (t*sum(s * g_old) >= epsilon*obj) + + be = sum(g_new * g_new)/sum(g_old * g_old) + s = be * s + g_new + g_old = g_new + + iter = iter + 1 +} + +extra_model_params = matrix(0, 4, 1) +extra_model_params[1,1] = positive_label +extra_model_params[2,1] = negative_label +extra_model_params[3,1] = intercept +extra_model_params[4,1] = dimensions + +w = t(cbind(t(w), t(extra_model_params))) + +writeMM(as(w,"CsparseMatrix"), paste(args[5], "w", sep=""));