This is an automated email from the ASF dual-hosted git repository. mboehm7 pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/systemds.git
commit 9e99f3c4c3bec42299fa5e48a0cb3bc3aea264be Author: Matthias Boehm <[email protected]> AuthorDate: Mon May 20 20:15:35 2024 +0200 [SYSTEMDS-3696] New sliceLineDebug built-in function for usability This patch adds a new sliceLineDebug function to present the top-k worst-slides returned from sliceLine (slicefinder) in a human readable format. This is the output for the Salaries dataset: sliceLineDebug: -- Slice #1: score=0.4041683676825298, size=248.0 ---- avg error=6.558681888351787E8, max error=8.524558818262574E9 ---- predicate: "rank" = "Prof" AND "sex" = "Male" -- Slice #2: score=0.3731763935666855, size=42.0 ---- avg error=8.271958572009121E8, max error=4.553584116646141E9 ---- predicate: "rank" = "Prof" AND "yrs.since.phd" = 31.25 -- Slice #3: score=0.3675193573989536, size=125.0 ---- avg error=6.758211389786526E8, max error=8.524558818262574E9 ---- predicate: "rank" = "Prof" AND "discipline" = "B" AND "sex" = "Male" -- Slice #4: score=0.35652331744984933, size=266.0 ---- avg error=6.307265846260264E8, max error=8.524558818262574E9 ---- predicate: "rank" = "Prof" --- scripts/builtin/lmPredictStats.dml | 2 +- scripts/builtin/sliceLineDebug.dml | 68 ++ scripts/builtin/slicefinder.dml | 26 +- .../java/org/apache/sysds/common/Builtins.java | 3 +- .../builtin/part2/BuiltinIncSliceLineTest.java | 740 ++++++++++----------- .../part2/BuiltinSliceLineRealDataTest.java | 70 ++ ...ceFinderTest.java => BuiltinSliceLineTest.java} | 4 +- src/test/resources/datasets/Salaries_tfspec.json | 3 + .../functions/builtin/sliceLineRealData.dml | 53 ++ 9 files changed, 582 insertions(+), 387 deletions(-) diff --git a/scripts/builtin/lmPredictStats.dml b/scripts/builtin/lmPredictStats.dml index 9a2d57c892..f538a59591 100644 --- a/scripts/builtin/lmPredictStats.dml +++ b/scripts/builtin/lmPredictStats.dml @@ -52,7 +52,7 @@ m_lmPredictStats = function(Matrix[Double] yhat, Matrix[Double] ytest, Boolean l R2 = 1 - ss_res / (sum_sq_y_test - n * (sum_y_test/n)^2); else R2 = sum((yhat - mean_y_test)^2) / sum((ytest - mean_y_test)^2) - + avg_tot = sum_y_test / n; ss_tot = sum_sq_y_test; ss_avg_tot = ss_tot - n * avg_tot ^ 2; diff --git a/scripts/builtin/sliceLineDebug.dml b/scripts/builtin/sliceLineDebug.dml new file mode 100644 index 0000000000..d3ea1494e3 --- /dev/null +++ b/scripts/builtin/sliceLineDebug.dml @@ -0,0 +1,68 @@ +#------------------------------------------------------------- +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +#------------------------------------------------------------- + +# This builtin function takes the outputs of SliceLine and the +# original transformencode meta data in order to print a human- +# readable debug output of the resulting top-k slices. +# +# INPUT: +# ------------------------------------------------------------------------------ +# TK top-k slices (k x ncol(X) if successful) +# TKC score, size, error of slices (k x 3) +# tfmeta transformencode meta data +# tfspec transform specification +# ------------------------------------------------------------------------------ +# +# OUTPUT: +# ------------------------------------------------------------------------------ +# S debug output collected as a string +# ------------------------------------------------------------------------------ + +m_sliceLineDebug = function(Matrix[Double] TK, + Matrix[Double] TKC, Frame[Unknown] tfmeta, String tfspec) + return(Matrix[Double] S) +{ + # FIXME: frame toString always pads to 100 rows + # print("sliceLineDebug: input\n"+toString(TK)+"\n"+toString(TKC)+"\n"+toString(tfmeta)); + print("sliceLineDebug:"); + + # prepare essential decoding info + N = colnames(tfmeta); + TKsafe = TK + (TK==0); # for vectorized decoding + FTK = transformdecode(target=TKsafe, meta=tfmeta, spec=tfspec); + + # actual debug output + for(i in 1:nrow(TK)) { + TKi = TK[i,]; FTKi = FTK[i,]; + print("-- Slice #"+i+": score="+as.scalar(TKC[i,1])+", size="+as.scalar(TKC[i,4])); + print("---- avg error="+as.scalar(TKC[i,2]/TKC[i,4])+", max error="+as.scalar(TKC[i,3])); + pred = ""; + for(j in 1:ncol(TKi)) { + if( as.scalar(TKi[1,j]) != 0 ) { + tmp = as.scalar(N[1,j]) + " = " + as.scalar(FTK[i,j]); + pred = ifelse(pred=="", tmp, pred+" AND "+tmp); + } + } + print("---- predicate: "+pred); + } + S = TK; +} + diff --git a/scripts/builtin/slicefinder.dml b/scripts/builtin/slicefinder.dml index 0689280c12..cddbe7f808 100644 --- a/scripts/builtin/slicefinder.dml +++ b/scripts/builtin/slicefinder.dml @@ -44,19 +44,19 @@ # OUTPUT: # ----------------------------------------------------------------------------------------- # TK top-k slices (k x ncol(X) if successful) -# TKC score, size, error of slices (k x 3) +# TKC score, total/max error, size of slices (k x 4) # D debug matrix, populated with enumeration stats if verbose # ----------------------------------------------------------------------------------------- -m_slicefinder = function(Matrix[Double] X, Matrix[Double] e, Int k = 4, - Int maxL = 0, Int minSup = 32, Double alpha = 0.5, Boolean tpEval = TRUE, +m_slicefinder = function(Matrix[Double] X, Matrix[Double] e, Int k = 4, + Int maxL = 0, Int minSup = 32, Double alpha = 0.5, Boolean tpEval = TRUE, Int tpBlksz = 16, Boolean selFeat = FALSE, Boolean verbose = FALSE) return(Matrix[Double] TK, Matrix[Double] TKC, Matrix[Double] D) { t1 = time(); # init debug matrix: levelID, enumerated S, valid S, TKmax, TKmin - D = matrix(0, 0, 5); + D = matrix(0, 0, 5); m = nrow(X); n = ncol(X); @@ -96,7 +96,7 @@ m_slicefinder = function(Matrix[Double] X, Matrix[Double] e, Int k = 4, # enumerate candidate join pairs, incl size/error pruning nrS = nrow(S); - S = getPairedCandidates(S, R, TK, TKC, k, level, eAvg, minSup, alpha, n2, foffb, foffe); + S = getPairedCandidates(S, R, TK, TKC, k, level, eAvg, minSup, alpha, n2, foffb, foffe); S2 = S; if(selFeat) S2 = removeEmpty(target=S, margin="cols", select=t(selCols)); @@ -109,10 +109,10 @@ m_slicefinder = function(Matrix[Double] X, Matrix[Double] e, Int k = 4, if( nrow(S) > 0 ) { # extract and evaluate candidate slices if( tpEval ) { # task-parallel - # hybrid task-parallel w/ 1 matrix-matrix for blocks of 16 matrix-vector + # hybrid task-parallel w/ 1 matrix-matrix for blocks of 16 matrix-vector R = matrix(0, nrow(S), 4) parfor( i in 1:ceil(nrow(S)/tpBlksz), check=0 ) { - beg = (i-1)*tpBlksz + 1; + beg = (i-1)*tpBlksz + 1; end = min(i*tpBlksz, nrow(R)); R[beg:end,] = evalSlice(X2, e, eAvg, t(S2[beg:end,]), level, alpha); } @@ -143,7 +143,7 @@ m_slicefinder = function(Matrix[Double] X, Matrix[Double] e, Int k = 4, } } -createAndScoreBasicSlices = function(Matrix[Double] X2, Matrix[Double] e, +createAndScoreBasicSlices = function(Matrix[Double] X2, Matrix[Double] e, Double eAvg, Double minSup, Double alpha, Boolean verbose) return(Matrix[Double] S, Matrix[Double] R, Matrix[Double] selCols) { @@ -165,7 +165,7 @@ createAndScoreBasicSlices = function(Matrix[Double] X2, Matrix[Double] e, sm = removeEmpty(target=merr, margin="rows", select=selCols); S = table(seq(1,nrow(attr)), attr, nrow(attr), n2); - # score 1-slices and create initial top-k + # score 1-slices and create initial top-k sc = score(ss, se, eAvg, alpha, nrow(X2)); R = cbind(sc, se, sm, ss); } @@ -290,7 +290,7 @@ getPairedCandidates = function(Matrix[Double] S, Matrix[Double] R, # and to void creating huge sparse intermediates [ID, M] = transformencode(target=as.frame(ID), spec="{ids:true,recode:[1]}") - # size pruning, with rowMin-rowMax transform + # size pruning, with rowMin-rowMax transform # to avoid densification (ignored zeros) map = table(ID, seq(1,nrow(P)), max(ID), nrow(P)) ubSizes = 1/rowMaxs(map * (1/t(ss))); @@ -304,13 +304,13 @@ getPairedCandidates = function(Matrix[Double] S, Matrix[Double] R, ubMError = replace(target=ubMError, pattern=Inf, replacement=0); ubScores = scoreUB(ubSizes, ubError, ubMError, eAvg, minSup, alpha, n2); [maxsc, minsc] = analyzeTopK(TKC); - fScores = (ubScores > minsc & ubScores > 0) + fScores = (ubScores > minsc & ubScores > 0) # missing parents pruning - numParents = rowSums((map %*% P12) != 0) + numParents = rowSums((map %*% P12) != 0) fParents = (numParents == level); - # apply all pruning + # apply all pruning fall = (fSizes & fScores & fParents); # deduplication of join outputs diff --git a/src/main/java/org/apache/sysds/common/Builtins.java b/src/main/java/org/apache/sysds/common/Builtins.java index de3f2b5c5a..9a70cd50db 100644 --- a/src/main/java/org/apache/sysds/common/Builtins.java +++ b/src/main/java/org/apache/sysds/common/Builtins.java @@ -301,7 +301,8 @@ public enum Builtins { SIGN("sign", false), SIN("sin", false), SINH("sinh", false), - SLICEFINDER("slicefinder", true), + SLICEFINDER("slicefinder", true), //TODO rename + SLICELINE_DEBUG("sliceLineDebug", true), SKEWNESS("skewness", true), SMAPE("smape", true), SMOTE("smote", true), diff --git a/src/test/java/org/apache/sysds/test/functions/builtin/part2/BuiltinIncSliceLineTest.java b/src/test/java/org/apache/sysds/test/functions/builtin/part2/BuiltinIncSliceLineTest.java index 82b1a65265..5cfba68b65 100644 --- a/src/test/java/org/apache/sysds/test/functions/builtin/part2/BuiltinIncSliceLineTest.java +++ b/src/test/java/org/apache/sysds/test/functions/builtin/part2/BuiltinIncSliceLineTest.java @@ -31,373 +31,373 @@ import org.apache.sysds.test.TestConfiguration; import org.apache.sysds.test.TestUtils; public class BuiltinIncSliceLineTest extends AutomatedTestBase { - private static final String PREP_NAME = "slicefinderPrep"; - private static final String TEST_NAME = "incSliceLine"; - private static final String TEST_DIR = "functions/builtin/"; - private static final String TEST_CLASS_DIR = TEST_DIR + BuiltinIncSliceLineTest.class.getSimpleName() + "/"; - private static final boolean VERBOSE = true; - - private static final double[][] EXPECTED_TOPK = new double[][] { - { 1.042, 69210699988.477, 11078019685.642, 18.000 }, - { 0.478, 92957580467.849, 11078019685.642, 39.000 }, - { 0.316, 40425449547.480, 11078019685.642, 10.000 }, - { 0.262, 67630559163.266, 7261504482.540, 29.000 }, - { 0.224, 202448990843.317, 11119010986.000, 125.000 }, - { 0.218, 68860581248.568, 7261504482.540, 31.000 }, - { 0.164, 206527445340.279, 11119010986.000, 135.000 }, - { 0.122, 68961886413.866, 7261504482.540, 34.000 }, - { 0.098, 360278523220.479, 11119010986.000, 266.000 }, - { 0.092, 73954209826.485, 11078019685.642, 39.000 } - }; - - @Override - public void setUp() { - addTestConfiguration(TEST_NAME, new TestConfiguration(TEST_CLASS_DIR, TEST_NAME, new String[] { "R" })); - } - - @Test - public void testTop4HybridDP() { - runIncSliceLineTest(4, "e", true, false, ExecMode.HYBRID); - } - - @Test - public void testTop4SinglenodeDP() { - runIncSliceLineTest(4, "e", true, false, ExecMode.SINGLE_NODE); - } - - @Test - public void testTop4HybridTP() { - runIncSliceLineTest(4, "e", false, false, ExecMode.HYBRID); - } - - @Test - public void testTop4SinglenodeTP() { - runIncSliceLineTest(4, "e", false, false, ExecMode.SINGLE_NODE); - } - - @Test - public void testTop10HybridDP() { - runIncSliceLineTest(10, "e", true, false, ExecMode.HYBRID); - } - - @Test - public void testTop10SinglenodeDP() { - runIncSliceLineTest(10, "e", true, false, ExecMode.SINGLE_NODE); - } - - @Test - public void testTop10HybridTP() { - runIncSliceLineTest(10, "e", false, false, ExecMode.HYBRID); - } - - @Test - public void testTop10SinglenodeTP() { - runIncSliceLineTest(10, "e", false, false, ExecMode.SINGLE_NODE); - } - - @Test - public void testTop4HybridDPSel() { - runIncSliceLineTest(4, "e", true, true, ExecMode.HYBRID); - } - - @Test - public void testTop4SinglenodeDPSel() { - runIncSliceLineTest(4, "e", true, true, ExecMode.SINGLE_NODE); - } - - @Test - public void testTop4HybridTPSel() { - runIncSliceLineTest(4, "e", false, true, ExecMode.HYBRID); - } - - @Test - public void testTop4SinglenodeTPSel() { - runIncSliceLineTest(4, "e", false, true, ExecMode.SINGLE_NODE); - } - - @Test - public void testTop10HybridDPSel() { - runIncSliceLineTest(10, "e", true, true, ExecMode.HYBRID); - } - - @Test - public void testTop10SinglenodeDPSel() { - runIncSliceLineTest(10, "e", true, true, ExecMode.SINGLE_NODE); - } - - @Test - public void testTop10HybridTPSel() { - runIncSliceLineTest(10, "e", false, true, ExecMode.HYBRID); - } - - @Test - public void testTop10SinglenodeTPSel() { - runIncSliceLineTest(10, "e", false, true, ExecMode.SINGLE_NODE); - } - - @Test - public void testTop10HybridTPSelE2() { - runIncSliceLineTest(10, "oe", false, true, ExecMode.HYBRID); - } - - @Test - public void testTop10SinglenodeTPSelE2() { - runIncSliceLineTest(10, "oe", false, true, ExecMode.SINGLE_NODE); - } - - @Test - public void testIncSliceLineCustomInputs1() { - double[][] newX = { - { 2, 1, 1, 2, 3, 2, 3, 3, 1, 2 }, - { 2, 2, 2, 3, 4, 1, 2, 1, 3, 2 }, - { 2, 1, 3, 3, 2, 2, 3, 1, 1, 4 }, - { 1, 2, 2, 1, 3, 2, 3, 2, 2, 3 }, - { 3, 2, 3, 4, 3, 3, 4, 1, 1, 3 }, - { 4, 3, 2, 3, 4, 4, 3, 4, 1, 1 }, - { 2, 2, 2, 4, 3, 3, 2, 2, 1, 2 }, - { 1, 1, 2, 2, 3, 3, 2, 1, 1, 2 }, - { 4, 3, 2, 1, 3, 2, 4, 2, 4, 3 }, - { 1, 3, 1, 4, 1, 3, 3, 2, 3, 2 }, - { 2, 4, 3, 1, 2, 4, 1, 3, 2, 4 }, - { 3, 2, 4, 3, 1, 4, 2, 3, 4, 1 }, - { 4, 1, 2, 4, 3, 1, 4, 2, 1, 3 }, - { 1, 3, 4, 2, 4, 3, 1, 4, 2, 3 }, - { 2, 4, 1, 3, 2, 4, 3, 1, 4, 2 }, - { 3, 2, 4, 1, 3, 4, 2, 3, 1, 4 }, - { 4, 1, 3, 2, 4, 1, 4, 2, 3, 1 }, - { 1, 3, 2, 4, 1, 3, 4, 2, 4, 3 }, - { 2, 4, 1, 3, 2, 4, 3, 1, 2, 4 }, - { 2, 3, 3, 2, 1, 4, 2, 3, 2, 3 } - }; - double[][] e = { - { 0.159 }, { 0.588 }, { 0.414 }, { 0.305 }, { 0.193 }, { 0.195 }, { 0.878 }, { 0.149 }, { 0.835 }, - { 0.344 }, - { 0.123 }, { 0.456 }, { 0.789 }, { 0.987 }, { 0.654 }, { 0.321 }, { 0.246 }, { 0.135 }, { 0.579 }, - { 0.802 } - }; - int K = 10; - double[][] correctRes = { - { 0.307, 2.807, 0.878, 4.000 }, - { 0.307, 2.807, 0.878, 4.000 }, - { 0.282, 2.759, 0.987, 4.000 }, - { 0.157, 4.046, 0.987, 7.000 }, - { 0.127, 2.956, 0.878, 5.000 }, - { 0.122, 2.942, 0.878, 5.000 }, - { 0.074, 3.298, 0.987, 6.000 }, - { 0.064, 4.197, 0.878, 8.000 }, - { 0.061, 2.796, 0.987, 5.000 }, - { 0.038, 3.194, 0.878, 6.000 } - }; - testIncSliceLineCustomInputs(newX, e, K, correctRes); - } - - @Test - public void testIncSliceLineCustomInputs2() { - double[][] newX = { - { 2, 1, 1, 1, 3, 4, 2, 2, 1, 2 }, - { 3, 3, 3, 2, 1, 2, 3, 1, 4, 2 }, - { 3, 2, 3, 1, 1, 1, 4, 3, 4, 2 }, - { 1, 3, 2, 3, 2, 3, 2, 1, 2, 1 }, - { 4, 3, 1, 1, 1, 1, 1, 1, 3, 2 }, - { 2, 2, 3, 3, 2, 2, 2, 3, 4, 1 }, - { 3, 2, 2, 2, 4, 4, 2, 4, 1, 1 }, - { 1, 3, 3, 2, 1, 3, 1, 2, 4, 4 }, - { 2, 1, 2, 2, 3, 1, 2, 3, 2, 1 }, - { 4, 1, 3, 4, 1, 4, 2, 3, 4, 4 }, - { 4, 2, 4, 4, 2, 1, 2, 1, 1, 4 }, - { 4, 1, 1, 4, 1, 4, 3, 2, 4, 2 }, - { 2, 1, 2, 2, 3, 1, 4, 3, 3, 4 }, - { 4, 1, 3, 1, 3, 1, 2, 1, 3, 3 }, - { 2, 1, 3, 1, 1, 3, 1, 2, 1, 2 }, - { 1, 3, 4, 3, 1, 2, 2, 2, 1, 1 }, - { 2, 4, 4, 3, 4, 1, 2, 1, 2, 4 }, - { 3, 3, 3, 3, 3, 1, 2, 3, 4, 4 }, - { 3, 2, 2, 2, 4, 1, 4, 2, 3, 1 }, - { 1, 2, 3, 2, 4, 3, 2, 3, 2, 3 } - }; - - double[][] e = { - { 0.591 }, { 0.858 }, { 0.144 }, { 0.350 }, { 0.931 }, { 0.951 }, { 0.788 }, { 0.491 }, { 0.358 }, - { 0.443 }, - { 0.231 }, { 0.564 }, { 0.897 }, { 0.879 }, { 0.546 }, { 0.132 }, { 0.462 }, { 0.153 }, { 0.759 }, - { 0.028 } - }; - int K = 10; - double[][] correctRes = { - { 0.410, 3.466, 0.931, 4.000 }, - { 0.410, 3.466, 0.931, 4.000 }, - { 0.111, 2.802, 0.897, 4.000 }, - { 0.075, 3.805, 0.951, 6.000 }, - { 0.057, 4.278, 0.897, 7.000 }, - { 0.047, 3.711, 0.931, 6.000 }, - { 0.035, 3.152, 0.897, 5.000 }, - { 0.032, 4.179, 0.897, 7.000 }, - { 0.023, 3.634, 0.931, 6.000 }, - { 0.013, 3.091, 0.931, 5.000 } - }; - - testIncSliceLineCustomInputs(newX, e, K, correctRes); - } - - @Test - public void testIncSliceLineCustomInputs3() { - double[][] newX = { - { 2, 1, 1, 2, 3, 2, 3, 3, 1, 2 }, - { 2, 2, 2, 3, 4, 1, 2, 1, 3, 2 }, - { 2, 1, 3, 3, 2, 2, 3, 1, 1, 4 }, - { 1, 2, 2, 1, 3, 2, 3, 2, 2, 3 }, - { 3, 2, 3, 4, 3, 3, 4, 1, 1, 3 }, - { 4, 3, 2, 3, 4, 4, 3, 4, 1, 1 }, - { 2, 2, 2, 4, 3, 3, 2, 2, 1, 2 }, - { 1, 1, 2, 2, 3, 3, 2, 1, 1, 2 }, - { 4, 3, 2, 1, 3, 2, 4, 2, 4, 3 }, - { 1, 3, 1, 4, 1, 3, 3, 2, 3, 2 }, - { 2, 4, 3, 1, 2, 4, 1, 3, 2, 4 }, - { 3, 2, 4, 3, 1, 4, 2, 3, 4, 1 }, - { 4, 1, 2, 4, 3, 1, 4, 2, 1, 3 }, - { 1, 3, 4, 2, 4, 3, 1, 4, 2, 3 }, - { 2, 4, 1, 3, 2, 4, 3, 1, 4, 2 }, - { 3, 2, 4, 1, 3, 4, 2, 3, 1, 4 }, - { 4, 1, 3, 2, 4, 1, 4, 2, 3, 1 }, - { 1, 3, 2, 4, 1, 3, 4, 2, 4, 3 }, - { 2, 4, 1, 3, 2, 4, 3, 1, 2, 4 }, - { 2, 3, 3, 2, 1, 4, 2, 3, 2, 3 }, - { 2, 1, 1, 1, 3, 4, 2, 2, 1, 2 }, - { 3, 3, 3, 2, 1, 2, 3, 1, 4, 2 }, - { 3, 2, 3, 1, 1, 1, 4, 3, 4, 2 }, - { 1, 3, 2, 3, 2, 3, 2, 1, 2, 1 }, - { 4, 3, 1, 1, 1, 1, 1, 1, 3, 2 }, - { 2, 2, 3, 3, 2, 2, 2, 3, 4, 1 }, - { 3, 2, 2, 2, 4, 4, 2, 4, 1, 1 }, - { 1, 3, 3, 2, 1, 3, 1, 2, 4, 4 }, - { 2, 1, 2, 2, 3, 1, 2, 3, 2, 1 }, - { 4, 1, 3, 4, 1, 4, 2, 3, 4, 4 }, - { 4, 2, 4, 4, 2, 1, 2, 1, 1, 4 }, - { 4, 1, 1, 4, 1, 4, 3, 2, 4, 2 }, - { 2, 1, 2, 2, 3, 1, 4, 3, 3, 4 }, - { 4, 1, 3, 1, 3, 1, 2, 1, 3, 3 }, - { 2, 1, 3, 1, 1, 3, 1, 2, 1, 2 }, - { 1, 3, 4, 3, 1, 2, 2, 2, 1, 1 }, - { 2, 4, 4, 3, 4, 1, 2, 1, 2, 4 }, - { 3, 3, 3, 3, 3, 1, 2, 3, 4, 4 }, - { 3, 2, 2, 2, 4, 1, 4, 2, 3, 1 }, - { 1, 2, 3, 2, 4, 3, 2, 3, 2, 3 } - }; - double[][] e = { - { 0.159 }, { 0.588 }, { 0.414 }, { 0.305 }, { 0.193 }, { 0.195 }, { 0.878 }, { 0.149 }, { 0.835 }, - { 0.344 }, - { 0.123 }, { 0.456 }, { 0.789 }, { 0.987 }, { 0.654 }, { 0.321 }, { 0.246 }, { 0.135 }, { 0.579 }, - { 0.802 }, - { 0.591 }, { 0.858 }, { 0.144 }, { 0.350 }, { 0.931 }, { 0.951 }, { 0.788 }, { 0.491 }, { 0.358 }, - { 0.443 }, - { 0.231 }, { 0.564 }, { 0.897 }, { 0.879 }, { 0.546 }, { 0.132 }, { 0.462 }, { 0.153 }, { 0.759 }, - { 0.028 } - }; - int K = 10; - double[][] correctRes = { - { 0.149, 4.300, 0.931, 6.000 }, - { 0.113, 3.138, 0.987, 4.000 }, - { 0.093, 4.644, 0.931, 7.000 }, - { 0.090, 4.630, 0.951, 7.000 }, - { 0.059, 8.002, 0.951, 14.000 }, - { 0.024, 2.954, 0.951, 4.000 }, - { 0.017, 3.415, 0.897, 5.000 }, - { 0.010, 3.398, 0.878, 5.000 }, - { 0.009, 2.923, 0.897, 4.000 }, - { 0.008, 3.391, 0.897, 5.000 } - }; - testIncSliceLineCustomInputs(newX, e, K, correctRes); - } - - // @Test - // public void testTop10SparkTP() { - // runIncSliceLineTest(10, false, ExecMode.SPARK); - // } - - private void runIncSliceLineTest(int K, String err, boolean dp, boolean selCols, ExecMode mode) { - ExecMode platformOld = setExecMode(mode); - loadTestConfiguration(getTestConfiguration(TEST_NAME)); - String HOME = SCRIPT_DIR + TEST_DIR; - String data = DATASET_DIR + "Salaries.csv"; - - try { - loadTestConfiguration(getTestConfiguration(TEST_NAME)); - - // run data preparation - fullDMLScriptName = HOME + PREP_NAME + ".dml"; - programArgs = new String[] { "-args", data, err, output("newX"), output("e") }; - runTest(true, false, null, -1); - - // read output and store for dml and R - double[][] newX = TestUtils.convertHashMapToDoubleArray(readDMLMatrixFromOutputDir("newX")); - double[][] e = TestUtils.convertHashMapToDoubleArray(readDMLMatrixFromOutputDir("e")); - writeInputMatrixWithMTD("newX", newX, true); - writeInputMatrixWithMTD("e", e, true); - - // execute main test - fullDMLScriptName = HOME + TEST_NAME + ".dml"; - programArgs = new String[] { "-args", input("newX"), input("e"), String.valueOf(K), - String.valueOf(!dp).toUpperCase(), String.valueOf(selCols).toUpperCase(), - String.valueOf(VERBOSE).toUpperCase(), output("R") }; - - runTest(true, false, null, -1); - - HashMap<CellIndex, Double> dmlfile = readDMLMatrixFromOutputDir("R"); - - // execute main test - fullDMLScriptName = HOME + "slicefinder" + ".dml"; - programArgs = new String[] { "-args", input("newX"), input("e"), String.valueOf(K), - String.valueOf(!dp).toUpperCase(), String.valueOf(selCols).toUpperCase(), - String.valueOf(VERBOSE).toUpperCase(), output("R") }; - - runTest(true, false, null, -1); - - HashMap<CellIndex, Double> dmlfile2 = readDMLMatrixFromOutputDir("R"); - - TestUtils.compareMatrices(dmlfile, dmlfile2, 1e-2, "Stat-IncSliceLine", "Stat-Slicefinder"); - - // compare expected results - if (err.equals("e")) { - double[][] ret = TestUtils.convertHashMapToDoubleArray(dmlfile); - if (mode != ExecMode.SPARK) // TODO why only CP correct, but R always matches? test framework? - for (int i = 0; i < K; i++) - TestUtils.compareMatrices(EXPECTED_TOPK[i], ret[i], 1e-2); - } - - // ensure proper inlining, despite initially multiple calls and large function - Assert.assertFalse(heavyHittersContainsSubString("evalSlice")); - } finally { - rtplatform = platformOld; - } - } - - public void testIncSliceLineCustomInputs(double[][] newX, double[][] e, int K, double[][] correctRes) { - boolean dp = true, selCols = false; - ExecMode mode = ExecMode.SINGLE_NODE; - ExecMode platformOld = setExecMode(mode); - loadTestConfiguration(getTestConfiguration(TEST_NAME)); - String HOME = SCRIPT_DIR + TEST_DIR; - - try { - loadTestConfiguration(getTestConfiguration(TEST_NAME)); - - writeInputMatrixWithMTD("newX", newX, false); - writeInputMatrixWithMTD("e", e, false); - - fullDMLScriptName = HOME + TEST_NAME + ".dml"; - programArgs = new String[] { "-args", input("newX"), input("e"), String.valueOf(K), - String.valueOf(!dp).toUpperCase(), String.valueOf(selCols).toUpperCase(), - String.valueOf(VERBOSE).toUpperCase(), output("R") }; - - runTest(true, false, null, -1); - - HashMap<CellIndex, Double> dmlfile = readDMLMatrixFromOutputDir("R"); - double[][] ret = TestUtils.convertHashMapToDoubleArray(dmlfile); - TestUtils.compareMatrices(correctRes, ret, 1e-2); - - Assert.assertFalse(heavyHittersContainsSubString("evalSlice")); - } finally { - rtplatform = platformOld; - } - } -} \ No newline at end of file + private static final String PREP_NAME = "slicefinderPrep"; + private static final String TEST_NAME = "incSliceLine"; + private static final String TEST_DIR = "functions/builtin/"; + private static final String TEST_CLASS_DIR = TEST_DIR + BuiltinIncSliceLineTest.class.getSimpleName() + "/"; + private static final boolean VERBOSE = true; + + private static final double[][] EXPECTED_TOPK = new double[][] { + { 1.042, 69210699988.477, 11078019685.642, 18.000 }, + { 0.478, 92957580467.849, 11078019685.642, 39.000 }, + { 0.316, 40425449547.480, 11078019685.642, 10.000 }, + { 0.262, 67630559163.266, 7261504482.540, 29.000 }, + { 0.224, 202448990843.317, 11119010986.000, 125.000 }, + { 0.218, 68860581248.568, 7261504482.540, 31.000 }, + { 0.164, 206527445340.279, 11119010986.000, 135.000 }, + { 0.122, 68961886413.866, 7261504482.540, 34.000 }, + { 0.098, 360278523220.479, 11119010986.000, 266.000 }, + { 0.092, 73954209826.485, 11078019685.642, 39.000 } + }; + + @Override + public void setUp() { + addTestConfiguration(TEST_NAME, new TestConfiguration(TEST_CLASS_DIR, TEST_NAME, new String[] { "R" })); + } + + @Test + public void testTop4HybridDP() { + runIncSliceLineTest(4, "e", true, false, ExecMode.HYBRID); + } + + @Test + public void testTop4SinglenodeDP() { + runIncSliceLineTest(4, "e", true, false, ExecMode.SINGLE_NODE); + } + + @Test + public void testTop4HybridTP() { + runIncSliceLineTest(4, "e", false, false, ExecMode.HYBRID); + } + + @Test + public void testTop4SinglenodeTP() { + runIncSliceLineTest(4, "e", false, false, ExecMode.SINGLE_NODE); + } + + @Test + public void testTop10HybridDP() { + runIncSliceLineTest(10, "e", true, false, ExecMode.HYBRID); + } + + @Test + public void testTop10SinglenodeDP() { + runIncSliceLineTest(10, "e", true, false, ExecMode.SINGLE_NODE); + } + + @Test + public void testTop10HybridTP() { + runIncSliceLineTest(10, "e", false, false, ExecMode.HYBRID); + } + + @Test + public void testTop10SinglenodeTP() { + runIncSliceLineTest(10, "e", false, false, ExecMode.SINGLE_NODE); + } + + @Test + public void testTop4HybridDPSel() { + runIncSliceLineTest(4, "e", true, true, ExecMode.HYBRID); + } + + @Test + public void testTop4SinglenodeDPSel() { + runIncSliceLineTest(4, "e", true, true, ExecMode.SINGLE_NODE); + } + + @Test + public void testTop4HybridTPSel() { + runIncSliceLineTest(4, "e", false, true, ExecMode.HYBRID); + } + + @Test + public void testTop4SinglenodeTPSel() { + runIncSliceLineTest(4, "e", false, true, ExecMode.SINGLE_NODE); + } + + @Test + public void testTop10HybridDPSel() { + runIncSliceLineTest(10, "e", true, true, ExecMode.HYBRID); + } + + @Test + public void testTop10SinglenodeDPSel() { + runIncSliceLineTest(10, "e", true, true, ExecMode.SINGLE_NODE); + } + + @Test + public void testTop10HybridTPSel() { + runIncSliceLineTest(10, "e", false, true, ExecMode.HYBRID); + } + + @Test + public void testTop10SinglenodeTPSel() { + runIncSliceLineTest(10, "e", false, true, ExecMode.SINGLE_NODE); + } + + @Test + public void testTop10HybridTPSelE2() { + runIncSliceLineTest(10, "oe", false, true, ExecMode.HYBRID); + } + + @Test + public void testTop10SinglenodeTPSelE2() { + runIncSliceLineTest(10, "oe", false, true, ExecMode.SINGLE_NODE); + } + + @Test + public void testIncSliceLineCustomInputs1() { + double[][] newX = { + { 2, 1, 1, 2, 3, 2, 3, 3, 1, 2 }, + { 2, 2, 2, 3, 4, 1, 2, 1, 3, 2 }, + { 2, 1, 3, 3, 2, 2, 3, 1, 1, 4 }, + { 1, 2, 2, 1, 3, 2, 3, 2, 2, 3 }, + { 3, 2, 3, 4, 3, 3, 4, 1, 1, 3 }, + { 4, 3, 2, 3, 4, 4, 3, 4, 1, 1 }, + { 2, 2, 2, 4, 3, 3, 2, 2, 1, 2 }, + { 1, 1, 2, 2, 3, 3, 2, 1, 1, 2 }, + { 4, 3, 2, 1, 3, 2, 4, 2, 4, 3 }, + { 1, 3, 1, 4, 1, 3, 3, 2, 3, 2 }, + { 2, 4, 3, 1, 2, 4, 1, 3, 2, 4 }, + { 3, 2, 4, 3, 1, 4, 2, 3, 4, 1 }, + { 4, 1, 2, 4, 3, 1, 4, 2, 1, 3 }, + { 1, 3, 4, 2, 4, 3, 1, 4, 2, 3 }, + { 2, 4, 1, 3, 2, 4, 3, 1, 4, 2 }, + { 3, 2, 4, 1, 3, 4, 2, 3, 1, 4 }, + { 4, 1, 3, 2, 4, 1, 4, 2, 3, 1 }, + { 1, 3, 2, 4, 1, 3, 4, 2, 4, 3 }, + { 2, 4, 1, 3, 2, 4, 3, 1, 2, 4 }, + { 2, 3, 3, 2, 1, 4, 2, 3, 2, 3 } + }; + double[][] e = { + { 0.159 }, { 0.588 }, { 0.414 }, { 0.305 }, { 0.193 }, { 0.195 }, { 0.878 }, { 0.149 }, { 0.835 }, + { 0.344 }, + { 0.123 }, { 0.456 }, { 0.789 }, { 0.987 }, { 0.654 }, { 0.321 }, { 0.246 }, { 0.135 }, { 0.579 }, + { 0.802 } + }; + int K = 10; + double[][] correctRes = { + { 0.307, 2.807, 0.878, 4.000 }, + { 0.307, 2.807, 0.878, 4.000 }, + { 0.282, 2.759, 0.987, 4.000 }, + { 0.157, 4.046, 0.987, 7.000 }, + { 0.127, 2.956, 0.878, 5.000 }, + { 0.122, 2.942, 0.878, 5.000 }, + { 0.074, 3.298, 0.987, 6.000 }, + { 0.064, 4.197, 0.878, 8.000 }, + { 0.061, 2.796, 0.987, 5.000 }, + { 0.038, 3.194, 0.878, 6.000 } + }; + testIncSliceLineCustomInputs(newX, e, K, correctRes); + } + + @Test + public void testIncSliceLineCustomInputs2() { + double[][] newX = { + { 2, 1, 1, 1, 3, 4, 2, 2, 1, 2 }, + { 3, 3, 3, 2, 1, 2, 3, 1, 4, 2 }, + { 3, 2, 3, 1, 1, 1, 4, 3, 4, 2 }, + { 1, 3, 2, 3, 2, 3, 2, 1, 2, 1 }, + { 4, 3, 1, 1, 1, 1, 1, 1, 3, 2 }, + { 2, 2, 3, 3, 2, 2, 2, 3, 4, 1 }, + { 3, 2, 2, 2, 4, 4, 2, 4, 1, 1 }, + { 1, 3, 3, 2, 1, 3, 1, 2, 4, 4 }, + { 2, 1, 2, 2, 3, 1, 2, 3, 2, 1 }, + { 4, 1, 3, 4, 1, 4, 2, 3, 4, 4 }, + { 4, 2, 4, 4, 2, 1, 2, 1, 1, 4 }, + { 4, 1, 1, 4, 1, 4, 3, 2, 4, 2 }, + { 2, 1, 2, 2, 3, 1, 4, 3, 3, 4 }, + { 4, 1, 3, 1, 3, 1, 2, 1, 3, 3 }, + { 2, 1, 3, 1, 1, 3, 1, 2, 1, 2 }, + { 1, 3, 4, 3, 1, 2, 2, 2, 1, 1 }, + { 2, 4, 4, 3, 4, 1, 2, 1, 2, 4 }, + { 3, 3, 3, 3, 3, 1, 2, 3, 4, 4 }, + { 3, 2, 2, 2, 4, 1, 4, 2, 3, 1 }, + { 1, 2, 3, 2, 4, 3, 2, 3, 2, 3 } + }; + + double[][] e = { + { 0.591 }, { 0.858 }, { 0.144 }, { 0.350 }, { 0.931 }, { 0.951 }, { 0.788 }, { 0.491 }, { 0.358 }, + { 0.443 }, + { 0.231 }, { 0.564 }, { 0.897 }, { 0.879 }, { 0.546 }, { 0.132 }, { 0.462 }, { 0.153 }, { 0.759 }, + { 0.028 } + }; + int K = 10; + double[][] correctRes = { + { 0.410, 3.466, 0.931, 4.000 }, + { 0.410, 3.466, 0.931, 4.000 }, + { 0.111, 2.802, 0.897, 4.000 }, + { 0.075, 3.805, 0.951, 6.000 }, + { 0.057, 4.278, 0.897, 7.000 }, + { 0.047, 3.711, 0.931, 6.000 }, + { 0.035, 3.152, 0.897, 5.000 }, + { 0.032, 4.179, 0.897, 7.000 }, + { 0.023, 3.634, 0.931, 6.000 }, + { 0.013, 3.091, 0.931, 5.000 } + }; + + testIncSliceLineCustomInputs(newX, e, K, correctRes); + } + + @Test + public void testIncSliceLineCustomInputs3() { + double[][] newX = { + { 2, 1, 1, 2, 3, 2, 3, 3, 1, 2 }, + { 2, 2, 2, 3, 4, 1, 2, 1, 3, 2 }, + { 2, 1, 3, 3, 2, 2, 3, 1, 1, 4 }, + { 1, 2, 2, 1, 3, 2, 3, 2, 2, 3 }, + { 3, 2, 3, 4, 3, 3, 4, 1, 1, 3 }, + { 4, 3, 2, 3, 4, 4, 3, 4, 1, 1 }, + { 2, 2, 2, 4, 3, 3, 2, 2, 1, 2 }, + { 1, 1, 2, 2, 3, 3, 2, 1, 1, 2 }, + { 4, 3, 2, 1, 3, 2, 4, 2, 4, 3 }, + { 1, 3, 1, 4, 1, 3, 3, 2, 3, 2 }, + { 2, 4, 3, 1, 2, 4, 1, 3, 2, 4 }, + { 3, 2, 4, 3, 1, 4, 2, 3, 4, 1 }, + { 4, 1, 2, 4, 3, 1, 4, 2, 1, 3 }, + { 1, 3, 4, 2, 4, 3, 1, 4, 2, 3 }, + { 2, 4, 1, 3, 2, 4, 3, 1, 4, 2 }, + { 3, 2, 4, 1, 3, 4, 2, 3, 1, 4 }, + { 4, 1, 3, 2, 4, 1, 4, 2, 3, 1 }, + { 1, 3, 2, 4, 1, 3, 4, 2, 4, 3 }, + { 2, 4, 1, 3, 2, 4, 3, 1, 2, 4 }, + { 2, 3, 3, 2, 1, 4, 2, 3, 2, 3 }, + { 2, 1, 1, 1, 3, 4, 2, 2, 1, 2 }, + { 3, 3, 3, 2, 1, 2, 3, 1, 4, 2 }, + { 3, 2, 3, 1, 1, 1, 4, 3, 4, 2 }, + { 1, 3, 2, 3, 2, 3, 2, 1, 2, 1 }, + { 4, 3, 1, 1, 1, 1, 1, 1, 3, 2 }, + { 2, 2, 3, 3, 2, 2, 2, 3, 4, 1 }, + { 3, 2, 2, 2, 4, 4, 2, 4, 1, 1 }, + { 1, 3, 3, 2, 1, 3, 1, 2, 4, 4 }, + { 2, 1, 2, 2, 3, 1, 2, 3, 2, 1 }, + { 4, 1, 3, 4, 1, 4, 2, 3, 4, 4 }, + { 4, 2, 4, 4, 2, 1, 2, 1, 1, 4 }, + { 4, 1, 1, 4, 1, 4, 3, 2, 4, 2 }, + { 2, 1, 2, 2, 3, 1, 4, 3, 3, 4 }, + { 4, 1, 3, 1, 3, 1, 2, 1, 3, 3 }, + { 2, 1, 3, 1, 1, 3, 1, 2, 1, 2 }, + { 1, 3, 4, 3, 1, 2, 2, 2, 1, 1 }, + { 2, 4, 4, 3, 4, 1, 2, 1, 2, 4 }, + { 3, 3, 3, 3, 3, 1, 2, 3, 4, 4 }, + { 3, 2, 2, 2, 4, 1, 4, 2, 3, 1 }, + { 1, 2, 3, 2, 4, 3, 2, 3, 2, 3 } + }; + double[][] e = { + { 0.159 }, { 0.588 }, { 0.414 }, { 0.305 }, { 0.193 }, { 0.195 }, { 0.878 }, { 0.149 }, { 0.835 }, + { 0.344 }, + { 0.123 }, { 0.456 }, { 0.789 }, { 0.987 }, { 0.654 }, { 0.321 }, { 0.246 }, { 0.135 }, { 0.579 }, + { 0.802 }, + { 0.591 }, { 0.858 }, { 0.144 }, { 0.350 }, { 0.931 }, { 0.951 }, { 0.788 }, { 0.491 }, { 0.358 }, + { 0.443 }, + { 0.231 }, { 0.564 }, { 0.897 }, { 0.879 }, { 0.546 }, { 0.132 }, { 0.462 }, { 0.153 }, { 0.759 }, + { 0.028 } + }; + int K = 10; + double[][] correctRes = { + { 0.149, 4.300, 0.931, 6.000 }, + { 0.113, 3.138, 0.987, 4.000 }, + { 0.093, 4.644, 0.931, 7.000 }, + { 0.090, 4.630, 0.951, 7.000 }, + { 0.059, 8.002, 0.951, 14.000 }, + { 0.024, 2.954, 0.951, 4.000 }, + { 0.017, 3.415, 0.897, 5.000 }, + { 0.010, 3.398, 0.878, 5.000 }, + { 0.009, 2.923, 0.897, 4.000 }, + { 0.008, 3.391, 0.897, 5.000 } + }; + testIncSliceLineCustomInputs(newX, e, K, correctRes); + } + + // @Test + // public void testTop10SparkTP() { + // runIncSliceLineTest(10, false, ExecMode.SPARK); + // } + + private void runIncSliceLineTest(int K, String err, boolean dp, boolean selCols, ExecMode mode) { + ExecMode platformOld = setExecMode(mode); + loadTestConfiguration(getTestConfiguration(TEST_NAME)); + String HOME = SCRIPT_DIR + TEST_DIR; + String data = DATASET_DIR + "Salaries.csv"; + + try { + loadTestConfiguration(getTestConfiguration(TEST_NAME)); + + // run data preparation + fullDMLScriptName = HOME + PREP_NAME + ".dml"; + programArgs = new String[] { "-args", data, err, output("newX"), output("e") }; + runTest(true, false, null, -1); + + // read output and store for dml and R + double[][] newX = TestUtils.convertHashMapToDoubleArray(readDMLMatrixFromOutputDir("newX")); + double[][] e = TestUtils.convertHashMapToDoubleArray(readDMLMatrixFromOutputDir("e")); + writeInputMatrixWithMTD("newX", newX, true); + writeInputMatrixWithMTD("e", e, true); + + // execute main test + fullDMLScriptName = HOME + TEST_NAME + ".dml"; + programArgs = new String[] { "-args", input("newX"), input("e"), String.valueOf(K), + String.valueOf(!dp).toUpperCase(), String.valueOf(selCols).toUpperCase(), + String.valueOf(VERBOSE).toUpperCase(), output("R") }; + + runTest(true, false, null, -1); + + HashMap<CellIndex, Double> dmlfile = readDMLMatrixFromOutputDir("R"); + + // execute main test + fullDMLScriptName = HOME + "slicefinder" + ".dml"; + programArgs = new String[] { "-args", input("newX"), input("e"), String.valueOf(K), + String.valueOf(!dp).toUpperCase(), String.valueOf(selCols).toUpperCase(), + String.valueOf(VERBOSE).toUpperCase(), output("R") }; + + runTest(true, false, null, -1); + + HashMap<CellIndex, Double> dmlfile2 = readDMLMatrixFromOutputDir("R"); + + TestUtils.compareMatrices(dmlfile, dmlfile2, 1e-2, "Stat-IncSliceLine", "Stat-Slicefinder"); + + // compare expected results + if (err.equals("e")) { + double[][] ret = TestUtils.convertHashMapToDoubleArray(dmlfile); + if (mode != ExecMode.SPARK) // TODO why only CP correct, but R always matches? test framework? + for (int i = 0; i < K; i++) + TestUtils.compareMatrices(EXPECTED_TOPK[i], ret[i], 1e-2); + } + + // ensure proper inlining, despite initially multiple calls and large function + Assert.assertFalse(heavyHittersContainsSubString("evalSlice")); + } finally { + rtplatform = platformOld; + } + } + + public void testIncSliceLineCustomInputs(double[][] newX, double[][] e, int K, double[][] correctRes) { + boolean dp = true, selCols = false; + ExecMode mode = ExecMode.SINGLE_NODE; + ExecMode platformOld = setExecMode(mode); + loadTestConfiguration(getTestConfiguration(TEST_NAME)); + String HOME = SCRIPT_DIR + TEST_DIR; + + try { + loadTestConfiguration(getTestConfiguration(TEST_NAME)); + + writeInputMatrixWithMTD("newX", newX, false); + writeInputMatrixWithMTD("e", e, false); + + fullDMLScriptName = HOME + TEST_NAME + ".dml"; + programArgs = new String[] { "-args", input("newX"), input("e"), String.valueOf(K), + String.valueOf(!dp).toUpperCase(), String.valueOf(selCols).toUpperCase(), + String.valueOf(VERBOSE).toUpperCase(), output("R") }; + + runTest(true, false, null, -1); + + HashMap<CellIndex, Double> dmlfile = readDMLMatrixFromOutputDir("R"); + double[][] ret = TestUtils.convertHashMapToDoubleArray(dmlfile); + TestUtils.compareMatrices(correctRes, ret, 1e-2); + + Assert.assertFalse(heavyHittersContainsSubString("evalSlice")); + } finally { + rtplatform = platformOld; + } + } +} diff --git a/src/test/java/org/apache/sysds/test/functions/builtin/part2/BuiltinSliceLineRealDataTest.java b/src/test/java/org/apache/sysds/test/functions/builtin/part2/BuiltinSliceLineRealDataTest.java new file mode 100644 index 0000000000..7dff753a60 --- /dev/null +++ b/src/test/java/org/apache/sysds/test/functions/builtin/part2/BuiltinSliceLineRealDataTest.java @@ -0,0 +1,70 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.sysds.test.functions.builtin.part2; + +import org.apache.sysds.common.Types; +import org.apache.sysds.common.Types.ExecType; +import org.apache.sysds.runtime.matrix.data.MatrixValue.CellIndex; +import org.apache.sysds.test.AutomatedTestBase; +import org.apache.sysds.test.TestConfiguration; +import org.apache.sysds.utils.Statistics; +import org.junit.Assert; +import org.junit.Test; + +public class BuiltinSliceLineRealDataTest extends AutomatedTestBase { + private final static String TEST_NAME = "sliceLineRealData"; + private final static String TEST_DIR = "functions/builtin/"; + private final static String TEST_CLASS_DIR = TEST_DIR + BuiltinSliceLineRealDataTest.class.getSimpleName() + "/"; + + private final static String SALARIES_DATA = DATASET_DIR + "Salaries.csv"; + private final static String SALARIES_TFSPEC = DATASET_DIR + "Salaries_tfspec.json"; + + @Override + public void setUp() { + for(int i=1; i<=4; i++) + addTestConfiguration(TEST_NAME+i, new TestConfiguration(TEST_CLASS_DIR, TEST_NAME, new String[] {"R"})); + } + + @Test + public void testSliceLineSalaries() { + runSliceLine(1, SALARIES_DATA, SALARIES_TFSPEC, 0.5, ExecType.CP); + } + + private void runSliceLine(int test, String data, String tfspec, double minAcc, ExecType instType) { + Types.ExecMode platformOld = setExecMode(instType); + try { + loadTestConfiguration(getTestConfiguration(TEST_NAME+test)); + + String HOME = SCRIPT_DIR + TEST_DIR; + fullDMLScriptName = HOME + TEST_NAME + ".dml"; + programArgs = new String[] {"-stats", + "-args", data, tfspec, output("R")}; + + runTest(true, false, null, -1); + + double acc = readDMLMatrixFromOutputDir("R").get(new CellIndex(1,1)); + Assert.assertTrue(acc >= minAcc); + Assert.assertEquals(0, Statistics.getNoOfExecutedSPInst()); + } + finally { + rtplatform = platformOld; + } + } +} diff --git a/src/test/java/org/apache/sysds/test/functions/builtin/part2/BuiltinSliceFinderTest.java b/src/test/java/org/apache/sysds/test/functions/builtin/part2/BuiltinSliceLineTest.java similarity index 99% rename from src/test/java/org/apache/sysds/test/functions/builtin/part2/BuiltinSliceFinderTest.java rename to src/test/java/org/apache/sysds/test/functions/builtin/part2/BuiltinSliceLineTest.java index d30cd1a240..cef4783931 100644 --- a/src/test/java/org/apache/sysds/test/functions/builtin/part2/BuiltinSliceFinderTest.java +++ b/src/test/java/org/apache/sysds/test/functions/builtin/part2/BuiltinSliceLineTest.java @@ -31,12 +31,12 @@ import org.apache.sysds.test.AutomatedTestBase; import org.apache.sysds.test.TestConfiguration; import org.apache.sysds.test.TestUtils; -public class BuiltinSliceFinderTest extends AutomatedTestBase +public class BuiltinSliceLineTest extends AutomatedTestBase { private static final String PREP_NAME = "slicefinderPrep"; private static final String TEST_NAME = "slicefinder"; private static final String TEST_DIR = "functions/builtin/"; - private static final String TEST_CLASS_DIR = TEST_DIR + BuiltinSliceFinderTest.class.getSimpleName() + "/"; + private static final String TEST_CLASS_DIR = TEST_DIR + BuiltinSliceLineTest.class.getSimpleName() + "/"; private static final boolean VERBOSE = true; private static final double[][] EXPECTED_TOPK = new double[][]{ diff --git a/src/test/resources/datasets/Salaries_tfspec.json b/src/test/resources/datasets/Salaries_tfspec.json new file mode 100644 index 0000000000..82728277ba --- /dev/null +++ b/src/test/resources/datasets/Salaries_tfspec.json @@ -0,0 +1,3 @@ +{ ids:true, +recode:[1,2,3,6], +bin:[{id:4, method:equi-width, numbins:10},{id:5, method:equi-width, numbins:10}]} diff --git a/src/test/scripts/functions/builtin/sliceLineRealData.dml b/src/test/scripts/functions/builtin/sliceLineRealData.dml new file mode 100644 index 0000000000..46c4bb368b --- /dev/null +++ b/src/test/scripts/functions/builtin/sliceLineRealData.dml @@ -0,0 +1,53 @@ +#------------------------------------------------------------- +# +# Licensed to the Apache Software Foundation (ASF) under one +# or more contributor license agreements. See the NOTICE file +# distributed with this work for additional information +# regarding copyright ownership. The ASF licenses this file +# to you under the Apache License, Version 2.0 (the +# "License"); you may not use this file except in compliance +# with the License. You may obtain a copy of the License at +# +# http://www.apache.org/licenses/LICENSE-2.0 +# +# Unless required by applicable law or agreed to in writing, +# software distributed under the License is distributed on an +# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY +# KIND, either express or implied. See the License for the +# specific language governing permissions and limitations +# under the License. +# +#------------------------------------------------------------- + +Forig = read($1, data_type="frame", format="csv", header=TRUE); +F = Forig[,1:ncol(Forig)-1]; +y = as.matrix(Forig[,ncol(Forig)]); +tfspec = read($2, data_type="scalar", value_type="string"); +[X, meta] = transformencode(target=F, spec=tfspec); + +meta = meta[,2:ncol(X)] +X = X[,2:ncol(X)] + +# one hot encoding +m = nrow(X) +n = ncol(X) +fdom = colMaxs(X); +foffb = t(cumsum(t(fdom))) - fdom; +foffe = t(cumsum(t(fdom))) +rix = matrix(seq(1,m)%*%matrix(1,1,n), m*n, 1) +cix = matrix(X + foffb, m*n, 1); +X2 = table(rix, cix); #one-hot encoded + +# learn model +B = lm(X=X2, y=y, icpt=2, reg=0.001, verbose=FALSE); +yhat = lmPredict(X=X2, B=B, ytest=y, icpt=1, verbose=FALSE); +acc = lmPredictStats(yhat, y, TRUE); +e = (y-yhat)^2; + +# model debugging via sliceline +[TK,TKC,D] = slicefinder(X=X, e=e, k=4, alpha=0.95, minSup=32, tpBlksz=16, verbose=TRUE) +tfspec2 = "{ ids:true, recode:[1,2,5], bin:[{id:3, method:equi-width, numbins:10},{id:4, method:equi-width, numbins:10}]}" +XYZ = sliceLineDebug(TK=TK, TKC=TKC, tfmeta=meta, tfspec=tfspec2) + +acc = acc[3,1]; +write(acc, $3);
