This is an automated email from the ASF dual-hosted git repository.
mboehm7 pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/systemds.git
The following commit(s) were added to refs/heads/main by this push:
new 4b2a3ca782 [SYSTEMDS-3696] Improved incremental slice-line buitin
4b2a3ca782 is described below
commit 4b2a3ca7823599f19be23aa41038e658cdd0ff4e
Author: Frederic Zoepffel <[email protected]>
AuthorDate: Mon Aug 26 11:43:39 2024 +0200
[SYSTEMDS-3696] Improved incremental slice-line buitin
Closes #2063.
---
scripts/builtin/incSliceLine.dml | 528 +++++++++++++--------
.../builtin/part2/BuiltinIncSliceLineTest.java | 369 ++++++++++----
.../scripts/functions/builtin/incSliceLine.dml | 3 +-
.../scripts/functions/builtin/incSliceLineFull.dml | 54 ++-
4 files changed, 671 insertions(+), 283 deletions(-)
diff --git a/scripts/builtin/incSliceLine.dml b/scripts/builtin/incSliceLine.dml
index 97232d990b..f9c34127d0 100644
--- a/scripts/builtin/incSliceLine.dml
+++ b/scripts/builtin/incSliceLine.dml
@@ -19,63 +19,87 @@
#
#-------------------------------------------------------------
-# This builtin function implements SliceLine, a linear-algebra-based
+# This builtin function implements incSliceLine, a linear-algebra-based
# ML model debugging technique for finding the top-k data slices where
# a trained models performs significantly worse than on the overall
-# dataset. For a detailed description and experimental results, see:
+# dataset. IncSliceLine is designed for scenarios in which training data is
updated incrementally.
+# For a detailed description of the SliceLine algorithm and experimental
results, see:
# Svetlana Sagadeeva, Matthias Boehm: SliceLine: Fast, Linear-Algebra-based
Slice Finding for ML Model Debugging.(SIGMOD 2021)
#
# INPUT:
#
---------------------------------------------------------------------------------------
-# addedX Feature matrix of added tuples in recoded/binned representation
-# oldX All-comprising feature matrix of previous runs (except for
current run) in recoded/binned representation
-# oldE All-comprising error vector of trained model for old tuples
-# newE Error vector of trained model for added tuples
-# k Number of subsets required
-# maxL maximum level L (conjunctions of L predicates), 0 unlimited
-# minSup minimum support (min number of rows per slice)
-# alpha weight [0,1]: 0 only size, 1 only error
-# tpEval flag for task-parallel slice evaluation,
-# otherwise data-parallel
-# tpBlksz block size for task-parallel execution (num slices)
-# selFeat flag for removing one-hot-encoded features that don't satisfy
-# the initial minimum-support constraint and/or have zero error
-# verbose flag for verbose debug output
-# prevLattice previous lattice (for incremental updates)
-# prevRL previous statistics whole lattice (for incremental updates)
-# prevTK previous top-k slices (for incremental updates)
-# prevTKC previous top-k scores (for incremental updates)
+# addedX Feature matrix of added tuples in recoded/binned
representation
+# oldX All-comprising feature matrix of previous runs (except for
current run) in recoded/binned representation
+# oldE All-comprising error vector of trained model for old tuples
+# addedE Error vector of trained model for added tuples
+# indicesRemoved Indices of tuples that were removed from the previous
dataset (oldX)
+# k Number of subsets required
+# maxL maximum level L (conjunctions of L predicates), 0 unlimited
+# minSup minimum support (min number of rows per slice)
+# alpha weight [0,1]: 0 only size, 1 only error
+# tpEval flag for task-parallel slice evaluation,
+# otherwise data-parallel
+# tpBlksz block size for task-parallel execution (num slices)
+# selFeat flag for removing one-hot-encoded features that don't
satisfy
+# the initial minimum-support constraint and/or have zero
error
+# verbose flag for verbose debug output
+# params list of parameters to ensure consistent parameters through
all runs (for incremental updates)
+# prevFoffb previous feature offsets (for incremental updates)
+# prevFoffe previous feature offsets (for incremental updates)
+# prevLattice previous lattice (for incremental updates)
+# metaPrevLattice previous meta information for lattice encoding (for
incremental updates)
+# prevStats previous statistics whole lattice (for incremental updates)
+# prevTK previous top-k slices (for incremental updates)
+# prevTKC previous top-k scores (for incremental updates)
+# encodeLat flag for encoding output lattice for less memory
consumption
#
---------------------------------------------------------------------------------------
#
# OUTPUT:
#
-----------------------------------------------------------------------------------------
-# TK top-k slices (k x ncol(newX) if successful)
-# TKC score, size, error of slices (k x 3)
-# D debug matrix, populated with enumeration stats if verbose
-# L lattice matrix
-# RL statistics matrix for all slices in L
-# Xout feature matrix consisting of oldX and newX for next run
-# eOut error vector consisting of oldE and newE for next run
+# TK top-k slices (k x ncol(totalX) if successful)
+# TKC score, size, error of slices (k x 3)
+# D debug matrix, populated with enumeration stats if verbose
+# L lattice matrix
+# metaLattice meta information for lattice encoding
+# Stats statistics matrix for all slices in L
+# Xout feature matrix consisting of oldX, addedX and without
removedX for next run
+# eOut error vector consisting of oldE, addedE and without
removedE for next run
+# foffb feature offsets for next run
+# foffe feature offsets for next run
+# params list of parameters for next run
#
-----------------------------------------------------------------------------------------
m_incSliceLine = function(
- Matrix[Double] addedX, Matrix[Double] oldX = matrix(0, 0, 0),
Matrix[Double] oldE = matrix(0, 0, 0),
- Matrix[Double] newE, Int k = 4, Int maxL = 0, Int minSup = 32, Double
alpha = 0.5, Boolean tpEval = TRUE,
- Int tpBlksz = 16, Boolean selFeat = FALSE, Boolean verbose = FALSE,
list[unknown] params = list(),
- Matrix[Double] prevLattice = matrix(0, 0, 0), list[unknown] prevRL =
list(), Matrix[Double] prevTK = matrix(0,0,0),
- Matrix[Double] prevTKC = matrix(0,0,0))
- return(Matrix[Double] TK, Matrix[Double] TKC, Matrix[Double] D,
Matrix[Double] L,
- list[unknown] RL, Matrix[Double] Xout, Matrix[Double] eOut, list[unknown]
params)
+ Matrix[Double] addedX, Matrix[Double] oldX = matrix(0, 0, 0),
+ Matrix[Double] oldE = matrix(0, 0, 0), Matrix[Double] addedE,
+ Int k = 4, Int maxL = 0, Int minSup = 32, Double alpha = 0.5,
+ Boolean tpEval = TRUE, Int tpBlksz = 16, Boolean selFeat = FALSE,
+ Matrix[Double] indicesRemoved = matrix(0,0,0),
+ Boolean verbose = FALSE, list[unknown] params = list(),
+ Matrix[Double] prevFoffb = matrix(0,0,0), Matrix[Double] prevFoffe =
matrix(0,0,0),
+ list[unknown] prevLattice = list(), list[unknown] metaPrevLattice =
list(),
+ list[unknown] prevStats = list(), Matrix[Double] prevTK = matrix(0,0,0),
+ Matrix[Double] prevTKC = matrix(0,0,0), Boolean encodeLat = TRUE)
+ return(
+ Matrix[Double] TK, Matrix[Double] TKC, Matrix[Double] D,
+ list[unknown] L, list[unknown] metaLattice,
+ list[unknown] Stats, Matrix[Double] Xout, Matrix[Double] eOut,
+ Matrix[Double] foffb, Matrix[Double] foffe, list[unknown] params)
{
- # TODO convert input/output of previous enumerated slices to lists
- # for simple collection and processing
-
- if(nrow(prevLattice) > 0 & length(params) == 0){
- [TK, TKC, D, L, RL, Xout, eOut, params] = throwNoParamsError();
+ # for incremental updates a params list storing previous parameters is
required
+ # to ensure consistent parameters over all runs
+ # the params list is automatically generated from the first run's params and
only needs to be passed on.
+ if(length(prevLattice) > 0 &
+ ((length(prevStats) == 0 | length(params) == 0 | nrow(prevFoffb) == 0 |
nrow(prevFoffe) == 0 |
+ nrow(oldX) == 0 | nrow(oldE) == 0) | (encodeLat &
length(metaPrevLattice) == 0)))
+ {
+ [TK, TKC, D, L, Stats, Xout, eOut, foffb, foffe, metaLattice, params] =
throwNoParamsError();
} else {
+
t1 = time();
+
# store params for next run
- [params, k, maxL, minSup, alpha, tpEval, tpBlksz, selFeat] = storeParams(k,
maxL, minSup, alpha, tpEval, tpBlksz, selFeat, params);
+ [params, k, maxL, minSup, alpha, tpEval, tpBlksz, selFeat, encodeLat] =
storeParams(k, maxL, minSup, alpha, tpEval, tpBlksz, selFeat, encodeLat,
params);
# init debug matrix: levelID, enumerated S, valid S, TKmax, TKmin
D = matrix(0, 0, 5);
@@ -85,27 +109,43 @@ m_incSliceLine = function(
oldX = matrix(0,0,ncol(addedX));
}
if(nrow(oldE) == 0) {
- oldE = matrix(0,0,ncol(newE));
+ oldE = matrix(0,0,ncol(addedE));
}
- newX = rbind(oldX, addedX);
- totalE = rbind(oldE, newE);
+
+ removedTuples = matrix(0,0,ncol(oldX));
+ if(length(indicesRemoved) > 0 & nrow(oldX) > 0){
+ ## remove all rows from oldX and oldE that are in indicesRemoved
+ [oldX, removedTuples] = removeRowsByIndices( oldX, indicesRemoved);
+ [oldE, removedE] = removeRowsByIndices( oldE, indicesRemoved);
+ if(verbose){
+ print("incSliceLine: removed "+nrow(indicesRemoved)+" tuples.");
+ }
+ }
+ totalX = rbind(oldX, addedX);
+ totalE = rbind(oldE, addedE);
# prepare output error vector for next run
eOut = totalE;
# compute number of tuples m and number of features n
- m = nrow(newX);
- n = ncol(newX);
+ m = nrow(totalX);
+ n = ncol(totalX);
- # prepare offset vectors and one-hot encoded newX
- fdom = colMaxs(newX);
+ # prepare offset vectors and one-hot encoded totalX
+ fdom = colMaxs(totalX);
foffb = t(cumsum(t(fdom))) - fdom;
foffe = t(cumsum(t(fdom)));
rix = matrix(seq(1,m)%*%matrix(1,1,n), m*n, 1)
- cix = matrix(newX + foffb, m*n, 1);
+ cix = matrix(totalX + foffb, m*n, 1);
X2 = table(rix, cix, 1, m, as.scalar(foffe[,n]), FALSE); #one-hot encoded
+
+ #inititalize booleans
+ differentOffsets = TRUE;
+ if(nrow(prevFoffb) > 0){
+ differentOffsets = (!((sum(prevFoffb == foffb) == (ncol(foffb) *
nrow(foffb))) & (sum(prevFoffe == foffe) == (ncol(foffe) * nrow(foffe))) ));
+ }
- # One-hot encoding of addedX and oldX
+ # combining of addedX and oldX
if(nrow(oldX) > 0){
oldX2 = X2[1:nrow(oldX),];
addedX2 = X2[(nrow(oldX)+1):nrow(X2),];
@@ -113,97 +153,108 @@ m_incSliceLine = function(
oldX2 = matrix(0,0,ncol(X2));
addedX2 = X2;
}
-
- # One-hot encoding of prevTK and prevLattice
+
+ # One-hot encoding of tuples that were removed from oldX
+ # combining of addedX and oldX in changedX2 facilitates simple determination
of unchanged slices for pruning
+ if(nrow(removedTuples) > 0){
+ removedTuples2 = oneHotEncodeUsingOffsets(removedTuples, foffb, foffe);
+ changedX2 = rbind(addedX2, removedTuples2);
+ }else {
+ changedX2 = addedX2;
+ }
+
+ # One-hot encoding of prevTK
if( length(prevTK) > 0 ) {
prevTK2 = oneHotEncodeUsingOffsets(prevTK, foffb, foffe);
}else{
prevTK2 = prevTK;
}
- if(length(prevLattice) > 0) {
- prevLattice2 = oneHotEncodeUsingOffsets(prevLattice, foffb, foffe);
- }else{
- prevLattice2 = prevLattice;
- }
-
- # compute first indices for each level for prevLattice
- levelIndices = list();
- levelIndices = append(levelIndices, 1);
- if(length(prevRL) > 1) {
- for( i in 1: length(prevRL)) {
- levelIndices = append(levelIndices, as.scalar(levelIndices[i]) +
nrow(as.matrix(prevRL[i])));
- }
- }
-
- # generate list of unchanged slices for each level (beginning at 2) in
prevLattice
- unchangedS = list();
- unchangedR = list();
- if(nrow(oldX) > 0 ){
- [unchangedS, unchangedR] = determineUnchangedSlices( prevRL, prevLattice2,
addedX2, levelIndices, unchangedS, unchangedR);
- }
# initialize statistics and basic slices
n2 = ncol(X2); # one-hot encoded features
eAvgOld = sum(oldE) / nrow(oldX); # average error
- eAvgNew = sum(newE) / nrow(newX);
+ eAvgNew = sum(addedE) / nrow(addedX);
eAvg = sum(totalE) / m; # average error
- t2 = time();
- [S, R, selCols] = createAndScoreBasicSlices(X2, addedX2, prevTK2, totalE,
eAvg, eAvgOld, eAvgNew, minSup, alpha, verbose);
- print("IncSliceLine: Time taken for basic slices: "+(time()-t2));
+ # compute score for lowest scoring prevTK slice to set high min score early
on to prune slices based on scores
+ minsc = -Inf;
+ if( nrow(prevTK2) > 0 ) {
+ [minsc] = computeLowestPrevTK (prevTK2, X2, totalE, eAvg, alpha, minsc)
+ }
- # initialize Lattice and Statistics
- L1 = matrix(0,0,ncol(X2));
- RL = list();
- L1 = rbind(L1, S);
- RL = append(RL,R);
+ # create and score basic slices (conjunctions of 1 feature)
+ [S, R, selCols] = createAndScoreBasicSlices(X2, changedX2, prevTK2, totalE,
eAvg, eAvgOld, eAvgNew, minSup, alpha, minsc, verbose);
+
+ # initialize lattice and statistics for incremental updates
+ Stats = list();
+ L = list();
+ metaLattice = list();
+ if( encodeLat ) {
+ [L1, M] = transformSlicesToIDs(S, foffb, foffe);
+ metaLattice = append(metaLattice, M);
+ }else {
+ L1 = S;
+ }
+ L = append(L, L1);
+ Stats = append(Stats,R[, 4]);
# initialize top-k
[TK, TKC] = maintainTopK(S, R, matrix(0,0,n2), matrix(0,0,4), k, minSup);
if( verbose ) {
- [maxsc, minsc] = analyzeTopK(TKC);
- print("incSliceLine: initial top-K: count="+nrow(TK)+", max="+maxsc+",
min="+minsc+" (time="+(time()-t1)+")")
- D = rbind(D, t(as.matrix(list(1, n2, nrow(S), maxsc, minsc))));
+ [maxsc2, minsc2] = analyzeTopK(TKC);
+ print("incSliceLine: initial top-K: count="+nrow(TK)+", max="+maxsc2+",
min="+minsc2+" (time="+(time()-t1)+")")
+ D = rbind(D, t(as.matrix(list(1, n2, nrow(S), maxsc2, minsc2))));
}
- # compute score for lowest scoring prevTK slice to set high min score early
on to prune slices based on scores
- minsc = 0.0;
- if( nrow(prevTK2) > 0 ) {
- [minsc] = computeLowestPrevTK (prevTK2, X2, totalE, eAvg, alpha, minsc)
- }
-
- # reduced dataset to relevant attributes (minSup, err>0), S reduced
on-the-fly
+ # reduce dataset to relevant attributes (minSup, err>0), S reduced on-the-fly
if( selFeat ){
X2 = removeEmpty(target=X2, margin="cols", select=t(selCols));
- addedX2 = removeEmpty(target=addedX2, margin="cols", select=t(selCols));
- /*if(nrow(prevLattice2)>0) {
- prevLattice2 = removeEmpty(target=prevLattice2, margin="cols",
select=t(selCols));
- }*/
+ changedX2 = removeEmpty(target=changedX2, margin="cols",
select=t(selCols));
}
# lattice enumeration w/ size/error pruning, one iteration per level
# termination condition (max #feature levels)
maxL = ifelse(maxL<=0, n, maxL)
level = 1;
- t3 = time();
while( nrow(S) > 0 & sum(S) > 0 & level < n & level < maxL ) {
level = level + 1;
# enumerate candidate join pairs, incl size/error pruning
nrS = nrow(S);
- [S, minsc] = getPairedCandidates(S, minsc, R, TKC, k, level, eAvg, minSup,
alpha, n2, foffb, foffe, unchangedS, unchangedR);
+ [S, minsc] = getPairedCandidates(S, minsc, R, TKC, level, eAvg, minSup,
alpha, n2, foffb, foffe);
S2 = S;
- # update lattice
- L1 = rbind(L1, S);
+ # prepare and store output lattice for next run
+ if ( encodeLat ) {
+ [Lrep, M] = transformSlicesToIDs(S, foffb, foffe);
+ metaLattice = append(metaLattice, M);
+ } else {
+ Lrep = S;
+ }
+ L = append(L, Lrep);
+
+ # load one hot encoded previous lattice for the current level
+ prevLattice2 = preparePrevLattice(prevLattice, metaPrevLattice, prevFoffb,
+ prevFoffe, foffb, foffe, level, encodeLat, differentOffsets)
if(selFeat){
+ if(length(prevLattice2)>0) {
+ prevLattice2 = removeEmpty(target=prevLattice2, margin="cols",
select=t(selCols));
+ }
S2 = removeEmpty(target=S, margin="cols", select=t(selCols));
}
if(verbose) {
- print("\nincSliceLine: level "+level+":")
+ print("\nincSliceLine: level "+level+":")
+ }
+
+ # prune unchanged slices with slice size < minSup
+ if(level <= length(prevStats)){
+ [S, S2] = pruneUnchangedSlices(S, S2, prevLattice2, prevStats,
changedX2, minSup, verbose, level);
+ }
+
+ if(verbose) {
print(" -- generated paired slice candidates: "+nrS+" -> "+nrow(S));
}
@@ -216,17 +267,16 @@ m_incSliceLine = function(
beg = (i-1)*tpBlksz + 1;
end = min(i*tpBlksz, nrow(R));
R[beg:end,] = evalSlice(X2, totalE, eAvg, t(S2[beg:end,]), level,
alpha);
-
}
# update output statistics
- RL = append(RL,R);
+ Stats = append(Stats,R[, 4]);
}
else { # data-parallel
R = evalSlice(X2, totalE, eAvg, t(S2), level, alpha);
# update output statistics
- RL = append(RL,R);
+ Stats = append(Stats,R[, 4]);
}
# maintain top-k after evaluation
@@ -242,14 +292,11 @@ m_incSliceLine = function(
}
}
}
- print("IncSliceLine: Time taken for lattice enumeration: "+(time()-t3));
TK = decodeOneHot(TK, foffb, foffe);
# prepare output feature matrix for next run
- Xout = newX;
-
- L = decodeOneHot(L1, foffb, foffe);
+ Xout = totalX;
if( verbose ) {
print("incSliceLine: terminated at level "+level+":\n"
@@ -257,15 +304,14 @@ m_incSliceLine = function(
}
/*
print("Lattice: \n "+ toString(L) +":\n"
- + "Statistics: \n "+ toString(RL));
+ + "Statistics: \n "+ toString(Stats));
*/
- print("Time taken: "+(time()-t1));
}
}
-createAndScoreBasicSlices = function(Matrix[Double] X2, Matrix[Double] addedX2,
+createAndScoreBasicSlices = function(Matrix[Double] X2, Matrix[Double]
changedX2,
Matrix[Double] prevTK2, Matrix[Double] e,
- Double eAvg, Double eAvgOld, Double eAvgNew, Double minSup, Double alpha,
Boolean verbose)
+ Double eAvg, Double eAvgOld, Double eAvgNew, Double minSup, Double alpha,
Double minsc, Boolean verbose)
return(Matrix[Double] S, Matrix[Double] R, Matrix[Double] selCols)
{
n2 = ncol(X2);
@@ -273,7 +319,7 @@ createAndScoreBasicSlices = function(Matrix[Double] X2,
Matrix[Double] addedX2,
err = t(t(e) %*% X2); # total error vector
merr = t(colMaxs(X2 * e)); # maximum error vector
- # prevTK2 is oneHotEncoded with the same offsets as oldX2 and addedX2.
+ # prevTK2 is oneHotEncoded with the same offsets as oldX2 and changedX2.
# produce a vector indicating which basic slices are within the previous top
k
TKCCnts = matrix(0, 0, 0);
if ( length (prevTK2) > 0 ) {
@@ -285,16 +331,16 @@ createAndScoreBasicSlices = function(Matrix[Double] X2,
Matrix[Double] addedX2,
# thus, here we remove all basic slices that are unchanged.
# only add "& addedCCnts != 0" if the eAvg from the new tuples is smaller
than eAvg on prev. dataset.
# otherwise scores of unchanged slices could shift into top k.
- if( eAvgOld > eAvgNew & eAvgNew != 0 & nrow(TKCCnts) >0) {
- # addedX2 is oneHotEncoded with the same offsets as oldX2 and newX2.
Thus unchanged basic slices will have a colSum of 0.
- # compute vector of colSums for addedX2 indicating which slices are
unchanged (0 value)
- addedCCnts = t(colSums(addedX2));
+ if( eAvgOld > eAvg & eAvgNew != 0 & nrow(TKCCnts) >0) {
+ # changedX2 is oneHotEncoded with the same offsets as oldX2 and totalX2.
Thus unchanged basic slices will have a colSum of 0.
+ # compute vector of colSums for changedX2 indicating which slices are
unchanged (0 value)
+ addedCCnts = t(colSums(changedX2));
addedOrTK = (addedCCnts > 0) | (TKCCnts > 0);
if( verbose ) {
drop = as.integer(sum(cCnts < minSup | err == 0 | addedOrTK == 0));
drop2 = as.integer(sum(cCnts < minSup | err == 0 ));
print("incSliceLine: dropping "+drop+"/"+n2+" features. " +drop2+ " were
below minSup = "+minSup+"
- and "+ (drop - drop2) + " were unchanged and not in the prevTK while
eAvgOld > eAvgNew. ");
+ and "+ (drop - drop2) + " were unchanged and not in the prevTK while
eAvgOld > eAvg. ");
}
selCols = (cCnts >= minSup & err > 0 & addedOrTK != 0);
@@ -306,8 +352,6 @@ createAndScoreBasicSlices = function(Matrix[Double] X2,
Matrix[Double] addedX2,
selCols = (cCnts >= minSup & err > 0 );
}
-
-
attr = removeEmpty(target=seq(1,n2), margin="rows", select=selCols);
ss = removeEmpty(target=cCnts, margin="rows", select=selCols);
se = removeEmpty(target=err, margin="rows", select=selCols);
@@ -317,6 +361,19 @@ createAndScoreBasicSlices = function(Matrix[Double] X2,
Matrix[Double] addedX2,
# score 1-slices and create initial top-k
sc = score(ss, se, eAvg, alpha, nrow(X2));
R = cbind(sc, se, sm, ss);
+
+ # score pruning of basic slices based on smallest score of all slices in
prevTK
+ if(minsc > -Inf) {
+ # compute upper bound scores for all basic slices
+ ubSc = scoreUB(ss, se, sm, eAvg, minSup, alpha, nrow(X2));
+ fScores = (ubSc >= minsc);
+ S = removeEmpty(target=S, margin="rows", select=fScores);
+ R = removeEmpty(target=R, margin="rows", select=fScores);
+ if(verbose){
+ print("incSliceLine: initial pruning based on minsc:
"+sum(fScores)+"/"+nrow(sc)+" slices remain.");
+ }
+ }
+
}
score = function(Matrix[Double] ss, Matrix[Double] se, Double eAvg, Double
alpha, Integer n)
@@ -376,11 +433,9 @@ analyzeTopK = function(Matrix[Double] TKC) return(Double
maxsc, Double minsc) {
}
getPairedCandidates = function(Matrix[Double] S, Double minsc,
- Matrix[Double] R,
- Matrix[Double] TKC, Integer k, Integer level,
+ Matrix[Double] R, Matrix[Double] TKC, Integer level,
Double eAvg, Integer minSup, Double alpha, Integer n2,
- Matrix[Double] foffb, Matrix[Double] foffe,
- list[unknown] unchangedS, list[unknown] unchangedR)
+ Matrix[Double] foffb, Matrix[Double] foffe)
return(Matrix[Double] P, Double minsc)
{
# prune invalid slices (possible without affecting overall
@@ -406,28 +461,6 @@ getPairedCandidates = function(Matrix[Double] S, Double
minsc,
P2 = table(seq(1,nrow(cix)), cix, nrow(rix), nrow(S));
P12 = P1 + P2; # combined slice
P = (P1 %*% S + P2 %*% S) != 0;
-
- # prune unchanged slices with slice size < minSup
- if (length(unchangedS) +1 >= level){
- # unchangedMat is matrix with 1 if slice is same as slice in unchangedS
(thus slice is not changed in addedX)
- # unchangedS[1] corresponds to level 2 (as level 1 is not incorporated
in unchangedS)
- unchangedMat = (P %*% t(as.matrix(unchangedS[level-1]))) == level;
- levStats = as.matrix(unchangedR[level-1]);
- levSs = levStats[, 4];
- unchangedAndBelowMinSupI = matrix(0, nrow(P), 1);
- for( i in 1:ncol(unchangedMat)){
- # by multiplying the columns of the unchanged mat with the sizes
- # from the previous lattice we get vectors indicating the sizes
- # of each unchanged slice (and 0 if it was changed)
- unchangedSizes = (unchangedMat[, i] * levSs[i])
- unchangedAndBelowMinSup = unchangedSizes < minSup & unchangedSizes > 0;
- unchangedAndBelowMinSupI = unchangedAndBelowMinSupI |
unchangedAndBelowMinSup;
- }
- P = removeEmpty(target=P, margin="rows", select=unchangedAndBelowMinSupI
== 0);
- P12 = removeEmpty(target=P12, margin="rows",
select=unchangedAndBelowMinSupI == 0);
- P1 = removeEmpty(target=P1, margin="rows",
select=unchangedAndBelowMinSupI == 0);
- P2 = removeEmpty(target=P2, margin="rows",
select=unchangedAndBelowMinSupI == 0);
- }
se = min(P1 %*% R[,2], P2 %*% R[,2])
sm = min(P1 %*% R[,3], P2 %*% R[,3])
@@ -447,23 +480,8 @@ getPairedCandidates = function(Matrix[Double] S, Double
minsc,
sm = removeEmpty(target=sm, margin="rows", select=I);
# prepare IDs for deduplication and pruning
- ID = matrix(0, nrow(P), 1);
- dom = foffe-foffb+1;
- for( j in 1:ncol(dom) ) {
- beg = as.scalar(foffb[1,j])+1;
- end = as.scalar(foffe[1,j]);
- I = rowIndexMax(P[,beg:end]) * rowMaxs(P[,beg:end]);
- prod = 1;
- if(j<ncol(dom)) {
- prod = prod(dom[1,(j+1):ncol(dom)])
- }
- ID = ID + I * prod;
- }
-
- # ID transformation to avoid exceeding INT_MAX and
- # and to void creating huge sparse intermediates
- [ID, M] = transformencode(target=as.frame(ID),
spec="{ids:true,recode:[1]}")
-
+ [ID, M] = transformSlicesToIDs(P, foffb, foffe);
+
# size pruning, with rowMin-rowMax transform
# to avoid densification (ignored zeros)
map = table(ID, seq(1,nrow(P)), max(ID), nrow(P))
@@ -492,12 +510,13 @@ getPairedCandidates = function(Matrix[Double] S, Double
minsc,
fParents = (numParents == level);
# apply all pruning
- fall = (fSizes & fScores & fParents);
+ fall = (fSizes & fScores & fParents );
# deduplication of join outputs
Dedup = removeEmpty(target=map, margin="rows", select=fall) != 0
#P = (Dedup %*% P) != 0, replaced by below (easier sparsity propagation)
DeI = table(rowIndexMax(Dedup), 1, nrow(P), 1);
+
P = removeEmpty(target=P, margin="rows", select=DeI);
}
}
@@ -534,7 +553,7 @@ decodeOneHot = function(Matrix[Double] M, Matrix[Double]
foffb, Matrix[Double] f
M = R;
}
-# function to oneHotEncode but with predefined feature offsets, to have the
same encoding for different datasets
+# function to oneHotEncode but with predefined feature offsets, to facilitate
the same encoding for different datasets
oneHotEncodeUsingOffsets = function(Matrix[Double] A, Matrix[Double] foffb,
Matrix[Double] foffe)
return(Matrix[Double] A_encoded)
{
@@ -561,28 +580,34 @@ oneHotEncodeUsingOffsets = function(Matrix[Double] A,
Matrix[Double] foffb, Matr
# throws an error if no params are provided for incremental updates.
# in case only individual parameters are entered they will be overwritten to
ensure consistency
throwNoParamsError = function()
- return(Matrix[Double] TK, Matrix[Double] TKC, Matrix[Double] D,
Matrix[Double] L,
- list[unknown] RL, Matrix[Double] Xout, Matrix[Double] eOut, list[unknown]
params) {
- print("incSliceLine: Error: prevLattice provided but no params for
incremental update.
- Output params list from previous run is needed as input to ensure same
paramters are used for incremental update.
- Individual params inputs will be overwritten to ensure consistency.");
+ return(Matrix[Double] TK, Matrix[Double] TKC, Matrix[Double] D,
list[unknown] L,
+ list[unknown] Stats, Matrix[Double] Xout, Matrix[Double] eOut,
Matrix[Double] foffb,
+ Matrix[Double] foffe, list[unknown] metaLattice, list[unknown] params) {
+ print("incSliceLine: wrong or no parameters provided for incremental update.
Please provide all parameters.");
+ print(" -- necessary parameters: params, prevLattice, prevStats, prevFoffb,
prevFoffe, prevTK, prevTKC, oldX, oldE. ");
+ print(" -- see documentation for more details.");
TK = matrix(0,0,0);
TKC = matrix(0,0,0);
D = matrix(0,0,0);
- L = matrix(0,0,0);
- RL = list();
+ L = list();
+ Stats = list();
Xout = matrix(0,0,0);
eOut = matrix(0,0,0);
+ foffb = matrix(0,0,0);
+ foffe = matrix(0,0,0);
+ metaLattice = list();
params = list();
}
# store parameters for next run and overwrite params if provided
-storeParams = function(Integer k, Integer maxL, Integer minSup, Double alpha,
Boolean tpEval, Integer tpBlksz, Boolean selFeat, list[unknown] params)
- return(list[unknown] params, Integer k, Integer maxL, Integer minSup, Double
alpha, Boolean tpEval, Integer tpBlksz, Boolean selFeat)
+storeParams = function(Integer k, Integer maxL, Integer minSup, Double alpha,
Boolean tpEval,
+ Integer tpBlksz, Boolean selFeat, Boolean encodeLat, list[unknown] params)
+ return(list[unknown] params, Integer k, Integer maxL, Integer minSup,
+ Double alpha, Boolean tpEval, Integer tpBlksz, Boolean selFeat, Boolean
encodeLat)
{
if(length(params) == 0) {
params = list(as.double(k), as.double(maxL), as.double(minSup),
- alpha, as.double(tpEval), as.double(tpBlksz), as.double(selFeat)) ;
+ alpha, as.double(tpEval), as.double(tpBlksz), as.double(selFeat),
as.double(encodeLat));
} else {
k = as.scalar(params[1]);
maxL = as.scalar(params[2]);
@@ -591,30 +616,26 @@ storeParams = function(Integer k, Integer maxL, Integer
minSup, Double alpha, Bo
tpEval = as.boolean(as.scalar(params[5]));
tpBlksz = as.scalar(params[6]);
selFeat = as.boolean(as.scalar(params[7]));
+ encodeLat = as.boolean(as.scalar(params[8]));
}
}
-determineUnchangedSlices = function(list[unknown] prevRL, Matrix[Double]
prevLattice2, Matrix[Double] addedX2, list[unknown] levelIndices, list[unknown]
unchangedS, list[unknown] unchangedR)
- return(list[unknown] unchangedS, list[unknown] unchangedR)
+determineUnchangedSlices = function(Matrix[Double] prevStatsAtLevel,
Matrix[Double] prevLatAtLevel, Matrix[Double] changedX2, Integer level)
+ return(Matrix[Double] unchangedS, Matrix[Double] unchangedR)
{
# only computing unchanged slices for levels 2 and above,
- # as for level 1 it is done more efficiently in createAndScoreBasicSlices
- for( level in 2:length(prevRL)) {
- prevStatsAtLevel = as.matrix(prevRL[level]);
- prevLatAtLevel = prevLattice2[as.scalar(levelIndices[level]) :
as.scalar(levelIndices[level+1]) - 1,];
- # Imat has a 1 where a slice in addedX2 belongs to a slice in
prevLatAtLevel
- Imat = (addedX2 %*% t(prevLatAtLevel) == level);
- unchangedSlicesI = colSums(Imat) == 0;
- unchangedSlices = removeEmpty(target=prevLatAtLevel, margin="rows",
select=unchangedSlicesI);
- unchangedStats = removeEmpty(target=prevStatsAtLevel, margin="rows",
select=unchangedSlicesI);
- unchangedS = append(unchangedS, unchangedSlices);
- unchangedR = append(unchangedR, unchangedStats);
- }
+ # Imat has a 1 where a slice in changedX2 belongs to a slice in
prevLatAtLevel
+ Imat = (changedX2 %*% t(prevLatAtLevel) == level);
+ unchangedSlicesI = colSums(Imat) == 0;
+ unchangedS = removeEmpty(target=prevLatAtLevel, margin="rows",
select=unchangedSlicesI);
+ unchangedR = removeEmpty(target=prevStatsAtLevel, margin="rows",
select=unchangedSlicesI);
}
computeLowestPrevTK = function(Matrix[Double] prevTK2, Matrix[Double]
X2,Matrix[Double] totalE, Double eAvg, Double alpha, Double minsc)
return(Double minsc)
{
+
+ minsc = Inf;
for(i in 1: nrow(prevTK2)){
# extract and evaluate candidate slices
curSlice = prevTK2[i,];
@@ -634,3 +655,134 @@ computeLowestPrevTK = function(Matrix[Double] prevTK2,
Matrix[Double] X2,Matrix[
}
}
}
+
+pruneUnchangedSlices = function(Matrix[Double] S, Matrix[Double] S2,
Matrix[Double] prevLattice2, list[unknown] prevStats, Matrix[Double] changedX2,
Int minSup, Boolean verbose, Integer level)
+ return(Matrix[Double] S, Matrix[Double] S2)
+{
+ unchangedS = matrix(0,0,ncol(prevLattice2));
+ unchangedR = matrix(0,0,4);
+ prevStatsAtLevel = as.matrix(prevStats[level])
+ prevLatAtLevel = prevLattice2;
+ [unchangedS, unchangedR] = determineUnchangedSlices( prevStatsAtLevel,
prevLatAtLevel, changedX2, level);
+
+ if (nrow(unchangedS) > 0) {
+ # unchangedMat is matrix with 1 if slice is same as slice in unchangedS
(thus slice is not changed in addedX)
+ unchangedMat = (S2 %*% t(unchangedS)) == level;
+ levStats = unchangedR;
+ levSs = levStats[, 1];
+ unchangedAndBelowMinSupI = matrix(0, nrow(S2), 1);
+ if(nrow(unchangedMat) > 0 & ncol(unchangedMat) > 0 & nrow(levSs) > 0){
+ for( i in 1:ncol(unchangedMat)){
+ # by multiplying the columns of the unchanged mat with the sizes
+ # from the previous lattice we get vectors indicating the sizes
+ # of each unchanged slice (and 0 if it was changed)
+ unchangedSizes = (unchangedMat[, i] * levSs[i])
+ unchangedAndBelowMinSup = unchangedSizes < minSup & unchangedSizes > 0;
+ unchangedAndBelowMinSupI = unchangedAndBelowMinSupI |
unchangedAndBelowMinSup;
+ }
+ }
+
+ if(sum(unchangedAndBelowMinSupI) > 0){
+ S2 = removeEmpty(target=S2, margin="rows",
select=unchangedAndBelowMinSupI == 0);
+ S = removeEmpty(target=S, margin="rows", select=unchangedAndBelowMinSupI
== 0);
+ if(verbose) {
+ print(" -- Pruning " + sum(unchangedAndBelowMinSupI) +" slices that
are unchanged and below min sup.");
+ }
+ }
+ }
+}
+
+transformSlicesToIDs = function(Matrix[Double] S, Matrix[Double] foffb,
Matrix[Double] foffe)
+ return(Matrix[Double] ID, frame[unknown] M)
+{
+ if(nrow(S) > 0){
+ ID = matrix(0, nrow(S), 1);
+ dom = foffe-foffb+1;
+
+ for( j in 1:ncol(dom) ) {
+ beg = as.scalar(foffb[1,j])+1;
+ end = as.scalar(foffe[1,j]);
+ I = rowIndexMax(S[,beg:end]) * rowMaxs(S[,beg:end]);
+ prod = 1;
+ if(j<ncol(dom)) {
+ prod = prod(dom[1,(j+1):ncol(dom)])
+ }
+ ID = ID + I * prod;
+ }
+ # ID transformation to avoid exceeding INT_MAX and
+ # and to void creating huge sparse intermediates
+ [ID, M] = transformencode(target=as.frame(ID),
spec="{ids:true,recode:[1]}")
+ } else {
+ ID = matrix(0, 0, 1);
+ M = as.frame(matrix(0, 0, 0));
+ }
+}
+
+# Function to decode IDs back into slices using domain size scaling reversal
+transformIDsToSlices = function(Matrix[Double] ID, Matrix[Double] foffb,
Matrix[Double] foffe, frame[unknown] M)
+ return(Matrix[Double] S)
+{
+ if(nrow(ID) > 0){
+ ID = transformdecode(target=ID, spec="{ids:true,recode:[1]}", meta=M);
+ ID = as.matrix(ID);
+ dom = foffe-foffb+1;
+ numSlices = nrow(ID);
+ numFeatures = ncol(foffb);
+ S = matrix(0, numSlices, numFeatures);
+
+ for (i in 1:numSlices) {
+ remainingID = as.scalar(ID[i, 1]);
+ for (j in 1:numFeatures) {
+ domSize = as.scalar(dom[1, numFeatures - j +1]);
+ value = remainingID %% domSize;
+ S[i, numFeatures - j +1] = value;
+ remainingID = floor(remainingID/domSize);
+ }
+ }
+ } else {
+ S = matrix(0, 0, 0);
+ }
+}
+
+preparePrevLattice = function(list[unknown] prevLattice, list[unknown]
metaPrevLattice,
+ Matrix[Double] prevFoffb, Matrix[Double] prevFoffe, Matrix[Double] foffb,
+ Matrix[Double] foffe, Integer level, Boolean encodeLat, Boolean
differentOffsets)
+ return (Matrix[Double] prevLattice2) {
+
+ prevLattice2 = matrix(0,0,0);
+ if( length(prevLattice) >= level ) {
+ prevLattice2 = as.matrix(prevLattice[level]);
+ if(nrow(prevLattice2) > 0){
+ if(encodeLat) {
+ metaAtLevel = as.frame(metaPrevLattice[level]);
+ prevLatticeDec = transformIDsToSlices(prevLattice2, prevFoffb,
prevFoffe, metaAtLevel);
+ prevLattice2 = oneHotEncodeUsingOffsets(prevLatticeDec, foffb,
foffe);
+ # in case the offsets in the previous run were different the lattice
needs to be adjusted
+ } else if(differentOffsets) {
+ prevLatticeDec = decodeOneHot(prevLattice2, prevFoffb, prevFoffe);
+ prevLattice2 = oneHotEncodeUsingOffsets(prevLatticeDec, foffb,
foffe);
+ }
+ }
+ }
+}
+
+# Function to remove rows from matrix M based on a list of indices
+removeRowsByIndices = function(Matrix[Double] M, Matrix[Double] indices)
+ return (Matrix[Double] MWithoutRemovedSlices, Matrix[Double] removedTuples)
+{
+ MWithoutRemovedSlices = matrix(0, 0, ncol(M));
+ removedTuples = matrix(0, 0, ncol(M));
+ index = 1;
+ for(i in 1:nrow(indices)){
+ index2 = as.scalar(indices[i]);
+ removedTuples = rbind(removedTuples, M[index2, ]);
+ if(index == index2){
+ index = index + 1;
+ i = i + 1;
+ } else {
+ MWithoutRemovedSlices = rbind(MWithoutRemovedSlices,
M[index:(index2-1),]);
+ index = index2+1;
+ }
+ }
+ MWithoutRemovedSlices = rbind(MWithoutRemovedSlices, M[index:nrow(M),]);
+}
diff --git
a/src/test/java/org/apache/sysds/test/functions/builtin/part2/BuiltinIncSliceLineTest.java
b/src/test/java/org/apache/sysds/test/functions/builtin/part2/BuiltinIncSliceLineTest.java
index ac59280b5a..a115767fe3 100644
---
a/src/test/java/org/apache/sysds/test/functions/builtin/part2/BuiltinIncSliceLineTest.java
+++
b/src/test/java/org/apache/sysds/test/functions/builtin/part2/BuiltinIncSliceLineTest.java
@@ -149,362 +149,498 @@ public class BuiltinIncSliceLineTest extends
AutomatedTestBase {
@Test
public void testTop4HybridDPFullFewAdded() {
- runIncSliceLineTest(4, "e", true, false,2, false, ExecMode.HYBRID);
+ runIncSliceLineTest(4, "e", true, false,2, 1, false, false,
true,ExecMode.HYBRID);
}
@Test
public void testTop4SinglenodeDPFullFewAdded() {
- runIncSliceLineTest(4, "e", true, false,2, false,
ExecMode.SINGLE_NODE);
+ runIncSliceLineTest(4, "e", true, false,2, 1, false, false,
true,ExecMode.SINGLE_NODE);
}
@Test
public void testTop4HybridTPFullFewAdded() {
- runIncSliceLineTest(4, "e", false, false, 2, false, ExecMode.HYBRID);
+ runIncSliceLineTest(4, "e", false, false, 2,1, false, false, false,
ExecMode.HYBRID);
+ }
+
+ @Test
+ public void testTop4HybridDPFullFewAddedRemoved() {
+ runIncSliceLineTest(4, "e", true, false,2, 90, false, true,
true,ExecMode.HYBRID);
+ }
+
+ @Test
+ public void testTop4SinglenodeDPFullFewAddedRemoved() {
+ runIncSliceLineTest(4, "e", true, false,2, 1, false, true,
true,ExecMode.SINGLE_NODE);
+ }
+
+ @Test
+ public void testTop4HybridTPFullFewAddedRemoved() {
+ runIncSliceLineTest(4, "e", false, false, 2,1, false, true, false,
ExecMode.HYBRID);
}
@Test
public void testTop4SinglenodeTPFullFewAdded() {
- runIncSliceLineTest(4, "e", false, false,2, false,
ExecMode.SINGLE_NODE);
+ runIncSliceLineTest(4, "e", false, false,2, 1, false, false, false,
ExecMode.SINGLE_NODE);
}
@Test
public void testTop10HybridDPFullFewAdded() {
- runIncSliceLineTest(10, "e", true, false,2, false, ExecMode.HYBRID);
+ runIncSliceLineTest(10, "e", true, false,2, 1, false, false, false,
ExecMode.HYBRID);
}
@Test
public void testTop10SinglenodeDPFullFewAdded() {
- runIncSliceLineTest(10, "e", true, false,2, false,
ExecMode.SINGLE_NODE);
+ runIncSliceLineTest(10, "e", true, false,2, 1, false, false, false,
ExecMode.SINGLE_NODE);
}
@Test
public void testTop10HybridTPFullFewAdded() {
- runIncSliceLineTest(10, "e", false, false,2, false, ExecMode.HYBRID);
+ runIncSliceLineTest(10, "e", false, false,2, 1, false, false,
false,ExecMode.HYBRID);
}
@Test
public void testTop10SinglenodeTPFullFewAdded() {
- runIncSliceLineTest(10, "e", false, false,2, false,
ExecMode.SINGLE_NODE);
+ runIncSliceLineTest(10, "e", false, false,2, 1, false, false, false,
ExecMode.SINGLE_NODE);
}
@Test
public void testTop4HybridDPSelFullFewAdded() {
- runIncSliceLineTest(4, "e", true, true,2, false, ExecMode.HYBRID);
+ runIncSliceLineTest(4, "e", true, true,2, 1, false, false, false,
ExecMode.HYBRID);
}
@Test
public void testTop4SinglenodeDPSelFullFewAdded() {
- runIncSliceLineTest(4, "e", true, true,2, false, ExecMode.SINGLE_NODE);
+ runIncSliceLineTest(4, "e", true, true,2, 1, false, false, false,
ExecMode.SINGLE_NODE);
}
@Test
public void testTop4HybridTPSelFullFewAdded() {
- runIncSliceLineTest(4, "e", false, true,2, false, ExecMode.HYBRID);
+ runIncSliceLineTest(4, "e", false, true,2, 1, false, false, false,
ExecMode.HYBRID);
+ }
+
+ @Test
+ public void testTop4HybridDPSelFullFewAddedRemoved() {
+ runIncSliceLineTest(4, "e", true, true,2, 1, false, true, false,
ExecMode.HYBRID);
+ }
+
+ @Test
+ public void testTop4SinglenodeDPSelFullFewAddedRemoved() {
+ runIncSliceLineTest(4, "e", true, true,2, 1, false, true, false,
ExecMode.SINGLE_NODE);
+ }
+
+ @Test
+ public void testTop4HybridTPSelFullFewAddedRemoved() {
+ runIncSliceLineTest(4, "e", false, true,2, 1, false, true, false,
ExecMode.HYBRID);
}
@Test
public void testTop4SinglenodeTPSelFullFewAdded() {
- runIncSliceLineTest(4, "e", false, true,4, false,
ExecMode.SINGLE_NODE);
+ runIncSliceLineTest(4, "e", false, true,4, 1, false, false, false,
ExecMode.SINGLE_NODE);
}
@Test
public void testTop10HybridDPSelFullFewAdded() {
- runIncSliceLineTest(10, "e", true, true, 2, false, ExecMode.HYBRID);
+ runIncSliceLineTest(10, "e", true, true, 2, 1, false, false, false,
ExecMode.HYBRID);
}
@Test
public void testTop10SinglenodeDPSelFullFewAdded() {
- runIncSliceLineTest(10, "e", true, true, 1, false,
ExecMode.SINGLE_NODE);
+ runIncSliceLineTest(10, "e", true, true, 1, 1, false, false, false,
ExecMode.SINGLE_NODE);
}
@Test
public void testTop10HybridTPSelFullFewAdded() {
- runIncSliceLineTest(10, "e", false, true, 2, false, ExecMode.HYBRID);
+ runIncSliceLineTest(10, "e", false, true, 2, 1, false, false, false,
ExecMode.HYBRID);
}
@Test
public void testTop10SinglenodeTPSelFullFewAdded() {
- runIncSliceLineTest(10, "e", false, true, 2, false,
ExecMode.SINGLE_NODE);
+ runIncSliceLineTest(10, "e", false, true, 2, 1, false, false, true,
ExecMode.SINGLE_NODE);
}
@Test
public void testTop10HybridTPSelE2FullFewAdded() {
- runIncSliceLineTest(10, "oe", false, true, 2, false, ExecMode.HYBRID);
+ runIncSliceLineTest(10, "oe", false, true, 2, 1, false, false, true,
ExecMode.HYBRID);
}
@Test
public void testTop10SinglenodeTPSelE2FullFewAdded() {
- runIncSliceLineTest(10, "oe", false, true, 2, false,
ExecMode.SINGLE_NODE);
+ runIncSliceLineTest(10, "oe", false, true, 2, 1, false, false, true,
ExecMode.SINGLE_NODE);
+ }
+
+ @Test
+ public void testTop10SinglenodeTPSelFullFewAddedRemoved() {
+ runIncSliceLineTest(10, "e", false, true, 2, 1, false, true, true,
ExecMode.SINGLE_NODE);
+ }
+
+ @Test
+ public void testTop10HybridTPSelE2FullFewAddedRemoved() {
+ runIncSliceLineTest(10, "oe", false, true, 2, 1, false, true, true,
ExecMode.HYBRID);
+ }
+
+ @Test
+ public void testTop10SinglenodeTPSelE2FullFewAddedRemoved() {
+ runIncSliceLineTest(10, "oe", false, true, 2, 1, false, true, true,
ExecMode.SINGLE_NODE);
}
@Test
public void testTop4HybridDPFullManyAdded() {
- runIncSliceLineTest(4, "e", true, false,50, false, ExecMode.HYBRID);
+ runIncSliceLineTest(4, "e", true, false,50, 1, false, false, true,
ExecMode.HYBRID);
}
@Test
public void testTop4SinglenodeDPFullManyAdded() {
- runIncSliceLineTest(4, "e", true, false,50, false,
ExecMode.SINGLE_NODE);
+ runIncSliceLineTest(4, "e", true, false,50, 1, false, false, true,
ExecMode.SINGLE_NODE);
}
@Test
public void testTop4HybridTPFullManyAdded() {
- runIncSliceLineTest(4, "e", false, false, 50, false, ExecMode.HYBRID);
+ runIncSliceLineTest(4, "e", false, false, 50,1, false, false, true,
ExecMode.HYBRID);
+ }
+
+ @Test
+ public void testTop4HybridDPFullManyAddedRemoved() {
+ runIncSliceLineTest(4, "e", true, false,50, 1, false, true, true,
ExecMode.HYBRID);
+ }
+
+ @Test
+ public void testTop4SinglenodeDPFullManyAddedRemoved() {
+ runIncSliceLineTest(4, "e", true, false,50, 1, false, true, true,
ExecMode.SINGLE_NODE);
+ }
+
+ @Test
+ public void testTop4HybridTPFullManyAddedRemoved() {
+ runIncSliceLineTest(4, "e", false, false, 50,1, false, true, true,
ExecMode.HYBRID);
}
@Test
public void testTop4SinglenodeTPFullManyAdded() {
- runIncSliceLineTest(4, "e", false, false,60, false,
ExecMode.SINGLE_NODE);
+ runIncSliceLineTest(4, "e", false, false,60, 1, false, false, true,
ExecMode.SINGLE_NODE);
}
@Test
public void testTop10HybridDPFullManyAdded() {
- runIncSliceLineTest(10, "e", true, false,50, false, ExecMode.HYBRID);
+ runIncSliceLineTest(10, "e", true, false,50, 1, false, false, false,
ExecMode.HYBRID);
}
@Test
public void testTop10SinglenodeDPFullManyAdded() {
- runIncSliceLineTest(10, "e", true, false,50, false,
ExecMode.SINGLE_NODE);
+ runIncSliceLineTest(10, "e", true, false,50, 1, false, false, false,
ExecMode.SINGLE_NODE);
}
@Test
public void testTop10HybridTPFullManyAdded() {
- runIncSliceLineTest(10, "e", false, false,90 , false, ExecMode.HYBRID);
+ runIncSliceLineTest(10, "e", false, false,90 , 1, false, false, false,
ExecMode.HYBRID);
}
@Test
public void testTop10SinglenodeTPFullManyAdded() {
- runIncSliceLineTest(10, "e", false, false,99 , false,
ExecMode.SINGLE_NODE);
+ runIncSliceLineTest(10, "e", false, false,99 , 1, false, false, false,
ExecMode.SINGLE_NODE);
}
@Test
public void testTop4HybridDPSelFullManyAdded() {
- runIncSliceLineTest(4, "e", true, true,50, false, ExecMode.HYBRID);
+ runIncSliceLineTest(4, "e", true, true,50, 1, false, false, false,
ExecMode.HYBRID);
+ }
+
+ @Test
+ public void testTop10HybridTPFullManyAddedRemoved() {
+ runIncSliceLineTest(10, "e", false, false,90 , 1, false, true, false,
ExecMode.HYBRID);
+ }
+
+ @Test
+ public void testTop10SinglenodeTPFullManyAddedRemoved() {
+ runIncSliceLineTest(10, "e", false, false,99 , 1, false, true, false,
ExecMode.SINGLE_NODE);
+ }
+
+ @Test
+ public void testTop4HybridDPSelFullManyAddedRemoved() {
+ runIncSliceLineTest(4, "e", true, true,50, 1, false, true, false,
ExecMode.HYBRID);
}
@Test
public void testTop4SinglenodeDPSelFullManyAdded() {
- runIncSliceLineTest(4, "e", true, true,50, false,
ExecMode.SINGLE_NODE);
+ runIncSliceLineTest(4, "e", true, true,50, 1, false, false, false,
ExecMode.SINGLE_NODE);
}
@Test
public void testTop4HybridTPSelFullManyAdded() {
- runIncSliceLineTest(4, "e", false, true,50, false, ExecMode.HYBRID);
+ runIncSliceLineTest(4, "e", false, true,50, 1, false, false, true,
ExecMode.HYBRID);
}
@Test
public void testTop4SinglenodeTPSelFullManyAdded() {
- runIncSliceLineTest(4, "e", false, true,50, false,
ExecMode.SINGLE_NODE);
+ runIncSliceLineTest(4, "e", false, true,50, 1, false, false, true,
ExecMode.SINGLE_NODE);
}
@Test
public void testTop10HybridDPSelFullManyAdded() {
- runIncSliceLineTest(10, "e", true, true, 50, false, ExecMode.HYBRID);
+ runIncSliceLineTest(10, "e", true, true, 50, 1, false, false, false,
ExecMode.HYBRID);
}
@Test
public void testTop10SinglenodeDPSelFullManyAdded() {
- runIncSliceLineTest(10, "e", true, true, 50, false,
ExecMode.SINGLE_NODE);
+ runIncSliceLineTest(10, "e", true, true, 50, 1, false, false, false,
ExecMode.SINGLE_NODE);
}
@Test
public void testTop10HybridTPSelFullManyAdded() {
- runIncSliceLineTest(10, "e", false, true, 50, false, ExecMode.HYBRID);
+ runIncSliceLineTest(10, "e", false, true, 50, 1, false, false, false,
ExecMode.HYBRID);
}
@Test
public void testTop10SinglenodeTPSelFullManyAdded() {
- runIncSliceLineTest(10, "e", false, true, 50, false,
ExecMode.SINGLE_NODE);
+ runIncSliceLineTest(10, "e", false, true, 50, 1, false, false, false,
ExecMode.SINGLE_NODE);
}
@Test
public void testTop10HybridTPSelE2FullManyAdded() {
- runIncSliceLineTest(10, "oe", false, true, 50, false, ExecMode.HYBRID);
+ runIncSliceLineTest(10, "oe", false, true, 50, 1, false, false,
false, ExecMode.HYBRID);
+ }
+
+ @Test
+ public void testTop10HybridTPSelFullManyAddedRemoved() {
+ runIncSliceLineTest(10, "e", false, true, 50, 1, false, true, false,
ExecMode.HYBRID);
+ }
+
+ @Test
+ public void testTop10SinglenodeTPSelFullManyAddedRemoved() {
+ runIncSliceLineTest(10, "e", false, true, 50, 1, false, true, false,
ExecMode.SINGLE_NODE);
+ }
+
+ @Test
+ public void testTop10HybridTPSelE2FullManyAddedRemoved() {
+ runIncSliceLineTest(10, "oe", false, true, 50, 99, false, true,
false, ExecMode.HYBRID);
}
@Test
public void testTop10SinglenodeTPSelE2FullManyAdded() {
- runIncSliceLineTest(10, "oe", false, true, 50, false,
ExecMode.SINGLE_NODE);
+ runIncSliceLineTest(10, "oe", false, true, 50, 1, false, false, true,
ExecMode.SINGLE_NODE);
}
@Test
public void testTop4HybridDPFullFewAddedOnlyNull() {
- runIncSliceLineTest(4, "e", true, false,2, true, ExecMode.HYBRID);
+ runIncSliceLineTest(4, "e", true, false,2, 1, true, false,true,
ExecMode.HYBRID);
}
@Test
public void testTop4SinglenodeDPFullFewAddedOnlyNull() {
- runIncSliceLineTest(4, "e", true, false,2, true, ExecMode.SINGLE_NODE);
+ runIncSliceLineTest(4, "e", true, false,2, 1, true, false,true,
ExecMode.SINGLE_NODE);
}
@Test
public void testTop4HybridTPFullFewAddedOnlyNull() {
- runIncSliceLineTest(4, "e", false, false, 2, true, ExecMode.HYBRID);
+ runIncSliceLineTest(4, "e", false, false, 2,1, true, false,true,
ExecMode.HYBRID);
}
@Test
public void testTop4SinglenodeTPFullFewAddedOnlyNull() {
- runIncSliceLineTest(4, "e", false, false,2, true,
ExecMode.SINGLE_NODE);
+ runIncSliceLineTest(4, "e", false, false,2, 1, true, false,true,
ExecMode.SINGLE_NODE);
+ }
+
+ @Test
+ public void testTop4SinglenodeDPFullFewAddedOnlyNullRemoved() {
+ runIncSliceLineTest(4, "e", true, false,2, 1, true, true,true,
ExecMode.SINGLE_NODE);
+ }
+
+ @Test
+ public void testTop4HybridTPFullFewAddedOnlyNullRemoved() {
+ runIncSliceLineTest(4, "e", false, false, 2,1, true, true,true,
ExecMode.HYBRID);
+ }
+
+ @Test
+ public void testTop4SinglenodeTPFullFewAddedOnlyNullRemoved() {
+ runIncSliceLineTest(4, "e", false, false,2, 1, true, true,true,
ExecMode.SINGLE_NODE);
}
@Test
public void testTop10HybridDPFullFewAddedOnlyNull() {
- runIncSliceLineTest(10, "e", true, false,2, true, ExecMode.HYBRID);
+ runIncSliceLineTest(10, "e", true, false,2, 1, true, false,true,
ExecMode.HYBRID);
}
@Test
public void testTop10SinglenodeDPFullFewAddedOnlyNull() {
- runIncSliceLineTest(10, "e", true, false,2, true,
ExecMode.SINGLE_NODE);
+ runIncSliceLineTest(10, "e", true, false,2, 1, true, false,true,
ExecMode.SINGLE_NODE);
}
@Test
public void testTop10HybridTPFullFewAddedOnlyNull() {
- runIncSliceLineTest(10, "e", false, false,2, true, ExecMode.HYBRID);
+ runIncSliceLineTest(10, "e", false, false,2, 1, true, false,true,
ExecMode.HYBRID);
}
@Test
public void testTop10SinglenodeTPFullFewAddedOnlyNull() {
- runIncSliceLineTest(10, "e", false, false,2, true,
ExecMode.SINGLE_NODE);
+ runIncSliceLineTest(10, "e", false, false,2, 1, true, false,false,
ExecMode.SINGLE_NODE);
}
@Test
public void testTop4HybridDPSelFullFewAddedOnlyNull() {
- runIncSliceLineTest(4, "e", true, true,2, true, ExecMode.HYBRID);
+ runIncSliceLineTest(4, "e", true, true,2, 1, true, false,false,
ExecMode.HYBRID);
}
@Test
public void testTop4SinglenodeDPSelFullFewAddedOnlyNull() {
- runIncSliceLineTest(4, "e", true, true,2, true, ExecMode.SINGLE_NODE);
+ runIncSliceLineTest(4, "e", true, true,2, 1, true, false,false,
ExecMode.SINGLE_NODE);
}
@Test
public void testTop4HybridTPSelFullFewAddedOnlyNull() {
- runIncSliceLineTest(4, "e", false, true,2, true, ExecMode.HYBRID);
+ runIncSliceLineTest(4, "e", false, true,2, 1, true, false,false,
ExecMode.HYBRID);
}
@Test
public void testTop4SinglenodeTPSelFullFewAddedOnlyNull() {
- runIncSliceLineTest(4, "e", false, true,4, false,
ExecMode.SINGLE_NODE);
+ runIncSliceLineTest(4, "e", false, true,4, 1, true, false,false,
ExecMode.SINGLE_NODE);
}
@Test
public void testTop10HybridDPSelFullFewAddedOnlyNull() {
- runIncSliceLineTest(10, "e", true, true, 2, true, ExecMode.HYBRID);
+ runIncSliceLineTest(10, "e", true, true, 2, 1, true, false,false,
ExecMode.HYBRID);
}
@Test
public void testTop10SinglenodeDPSelFullFewAddedOnlyNull() {
- runIncSliceLineTest(10, "e", true, true, 1, true,
ExecMode.SINGLE_NODE);
+ runIncSliceLineTest(10, "e", true, true, 1, 1, true, false, false,
ExecMode.SINGLE_NODE);
}
@Test
public void testTop10HybridTPSelFullFewAddedOnlyNull() {
- runIncSliceLineTest(10, "e", false, true, 2, true, ExecMode.HYBRID);
+ runIncSliceLineTest(10, "e", false, true, 2, 1, true, false,false,
ExecMode.HYBRID);
}
@Test
public void testTop10SinglenodeTPSelFullFewAddedOnlyNull() {
- runIncSliceLineTest(10, "e", false, true, 2, true,
ExecMode.SINGLE_NODE);
+ runIncSliceLineTest(10, "e", false, true, 2, 1, true, false,false,
ExecMode.SINGLE_NODE);
}
@Test
public void testTop10HybridTPSelE2FullFewAddedOnlyNull() {
- runIncSliceLineTest(10, "oe", false, true, 2, true, ExecMode.HYBRID);
+ runIncSliceLineTest(10, "oe", false, true, 2, 1, true, false,false,
ExecMode.HYBRID);
}
@Test
public void testTop10SinglenodeTPSelE2FullFewAddedOnlyNull() {
- runIncSliceLineTest(10, "oe", false, true, 2, true,
ExecMode.SINGLE_NODE);
+ runIncSliceLineTest(10, "oe", false, true, 2, 1, true, false, false,
ExecMode.SINGLE_NODE);
}
@Test
public void testTop4HybridDPFullManyAddedOnlyNull() {
- runIncSliceLineTest(4, "e", true, false,50, true, ExecMode.HYBRID);
+ runIncSliceLineTest(4, "e", true, false,50, 1, true, false,true,
ExecMode.HYBRID);
}
@Test
public void testTop4SinglenodeDPFullManyAddedOnlyNull() {
- runIncSliceLineTest(4, "e", true, false,50, true,
ExecMode.SINGLE_NODE);
+ runIncSliceLineTest(4, "e", true, false,50, 1, true, false,true,
ExecMode.SINGLE_NODE);
}
@Test
public void testTop4HybridTPFullManyAddedOnlyNull() {
- runIncSliceLineTest(4, "e", false, false, 50, true, ExecMode.HYBRID);
+ runIncSliceLineTest(4, "e", false, false, 50, 1, true, false,true,
ExecMode.HYBRID);
}
@Test
public void testTop4SinglenodeTPFullManyAddedOnlyNull() {
- runIncSliceLineTest(4, "e", false, false,60, true,
ExecMode.SINGLE_NODE);
+ runIncSliceLineTest(4, "e", false, false,60, 1, true, false, false,
ExecMode.SINGLE_NODE);
}
@Test
public void testTop10HybridDPFullManyAddedOnlyNull() {
- runIncSliceLineTest(10, "e", true, false,50, true, ExecMode.HYBRID);
+ runIncSliceLineTest(10, "e", true, false,50, 1, true, false,true,
ExecMode.HYBRID);
}
@Test
public void testTop10SinglenodeDPFullManyAddedOnlyNull() {
- runIncSliceLineTest(10, "e", true, false,50, true,
ExecMode.SINGLE_NODE);
+ runIncSliceLineTest(10, "e", true, false,50, 1, true, false,true,
ExecMode.SINGLE_NODE);
}
@Test
public void testTop10HybridTPFullManyAddedOnlyNull() {
- runIncSliceLineTest(10, "e", false, false,90 , true, ExecMode.HYBRID);
+ runIncSliceLineTest(10, "e", false, false,90 , 1, true, false,true,
ExecMode.HYBRID);
}
@Test
public void testTop10SinglenodeTPFullManyAddedOnlyNull() {
- runIncSliceLineTest(10, "e", false, false,99 , true,
ExecMode.SINGLE_NODE);
+ runIncSliceLineTest(10, "e", false, false,99 , 1, true, false,true,
ExecMode.SINGLE_NODE);
+ }
+
+ @Test
+ public void testTop10SinglenodeDPFullManyAddedOnlyNullRemoved() {
+ runIncSliceLineTest(10, "e", true, false,50, 1, true, true,true,
ExecMode.SINGLE_NODE);
+ }
+
+ @Test
+ public void testTop10HybridTPFullManyAddedOnlyNullRemoved() {
+ runIncSliceLineTest(10, "e", false, false,90 , 50, true, true,true,
ExecMode.HYBRID);
+ }
+
+ @Test
+ public void testTop10SinglenodeTPFullManyAddedOnlyNullRemoved() {
+ runIncSliceLineTest(10, "e", false, false,99 , 1, true, true,true,
ExecMode.SINGLE_NODE);
}
@Test
public void testTop4HybridDPSelFullManyAddedOnlyNull() {
- runIncSliceLineTest(4, "e", true, true,50, true, ExecMode.HYBRID);
+ runIncSliceLineTest(4, "e", true, true,50, 1, true, false, true,
ExecMode.HYBRID);
}
@Test
public void testTop4SinglenodeDPSelFullManyAddedOnlyNull() {
- runIncSliceLineTest(4, "e", true, true,50, true, ExecMode.SINGLE_NODE);
+ runIncSliceLineTest(4, "e", true, true,50, 1, true, false,false,
ExecMode.SINGLE_NODE);
}
@Test
public void testTop4HybridTPSelFullManyAddedOnlyNull() {
- runIncSliceLineTest(4, "e", false, true,50, true, ExecMode.HYBRID);
+ runIncSliceLineTest(4, "e", false, true,50, 1, true, false,false,
ExecMode.HYBRID);
}
@Test
public void testTop4SinglenodeTPSelFullManyAddedOnlyNull() {
- runIncSliceLineTest(4, "e", false, true,50, false,
ExecMode.SINGLE_NODE);
- }
+ runIncSliceLineTest(4, "e", false, true,50, 1, true, false,false,
ExecMode.SINGLE_NODE);
+ }
@Test
public void testTop10HybridDPSelFullManyAddedOnlyNull() {
- runIncSliceLineTest(10, "e", true, true, 50, true, ExecMode.HYBRID);
+ runIncSliceLineTest(10, "e", true, true, 50, 1, true, false,false,
ExecMode.HYBRID);
}
@Test
public void testTop10SinglenodeDPSelFullManyAddedOnlyNull() {
- runIncSliceLineTest(10, "e", true, true, 50, true,
ExecMode.SINGLE_NODE);
+ runIncSliceLineTest(10, "e", true, true, 50, 1, true, false, false,
ExecMode.SINGLE_NODE);
}
@Test
public void testTop10HybridTPSelFullManyAddedOnlyNull() {
- runIncSliceLineTest(10, "e", false, true, 50, true, ExecMode.HYBRID);
+ runIncSliceLineTest(10, "e", false, true, 50, 1, true, false,true,
ExecMode.HYBRID);
}
@Test
public void testTop10SinglenodeTPSelFullManyAddedOnlyNull() {
- runIncSliceLineTest(10, "e", false, true, 50, true,
ExecMode.SINGLE_NODE);
+ runIncSliceLineTest(10, "e", false, true, 50, 1, true, false, true,
ExecMode.SINGLE_NODE);
}
@Test
public void testTop10HybridTPSelE2FullManyAddedOnlyNull() {
- runIncSliceLineTest(10, "oe", false, true, 50, true, ExecMode.HYBRID);
+ runIncSliceLineTest(10, "oe", false, true, 50, 1, true, false, true,
ExecMode.HYBRID);
}
@Test
public void testTop10SinglenodeTPSelE2FullManyAddedOnlyNull() {
- runIncSliceLineTest(10, "oe", false, true, 50, true,
ExecMode.SINGLE_NODE);
+ runIncSliceLineTest(10, "oe", false, true, 50, 1, true, false, true,
ExecMode.SINGLE_NODE);
+ }
+
+
+ @Test
+ public void testTop10SinglenodeTPSelFullManyAddedOnlyNullRemoved() {
+ runIncSliceLineTest(10, "e", false, true, 50, 1, true, true, true,
ExecMode.SINGLE_NODE);
+ }
+
+ @Test
+ public void testTop10HybridTPSelE2FullManyAddedOnlyNullRemoved() {
+ runIncSliceLineTest(10, "oe", false, true, 50, 20, true, true, true,
ExecMode.HYBRID);
+ }
+
+ @Test
+ public void testTop10SinglenodeTPSelE2FullManyAddedOnlyNullRemoved() {
+ runIncSliceLineTest(10, "oe", false, true, 50, 40, true, true, true,
ExecMode.SINGLE_NODE);
}
@Test
@@ -846,7 +982,7 @@ public class BuiltinIncSliceLineTest extends
AutomatedTestBase {
};
- runIncSliceLineTest(newX, e, 10, "e", false, true, 50, false,
ExecMode.SINGLE_NODE);
+ runIncSliceLineTest(newX, e, 10, "e", false, true, 50, 1, false,
false, true, ExecMode.SINGLE_NODE);
}
// @Test
@@ -912,12 +1048,14 @@ public class BuiltinIncSliceLineTest extends
AutomatedTestBase {
}
}
- private void runIncSliceLineTest(int K, String err, boolean dp, boolean
selCols, int proportionOfTuplesAddedInPercent, boolean onlyNullEAdded, ExecMode
mode) {
- runIncSliceLineTest(null, null, K, err, dp, selCols,
proportionOfTuplesAddedInPercent, onlyNullEAdded, mode);
+ private void runIncSliceLineTest(int K, String err, boolean dp, boolean
selCols, int proportionOfTuplesAddedInPercent, int
proportionOfTuplesRemovedInPercent, boolean onlyNullEAdded, boolean
removeTuples, boolean encodeLat, ExecMode mode) {
+
+ runIncSliceLineTest(null, null, K, err, dp, selCols,
proportionOfTuplesAddedInPercent, proportionOfTuplesRemovedInPercent,
onlyNullEAdded, removeTuples, encodeLat, mode);
+
}
- private void runIncSliceLineTest(double[][] customX, double[][]
customE,int K, String err, boolean dp, boolean selCols, int
proportionOfTuplesAddedInPercent, boolean onlyNullEAdded, ExecMode mode) {
+ private void runIncSliceLineTest(double[][] customX, double[][]
customE,int K, String err, boolean dp, boolean selCols, int
proportionOfTuplesAddedInPercent, int proportionOfTuplesRemovedInPercent,
boolean onlyNullEAdded, boolean removeTuples, boolean encodeLat, ExecMode mode)
{
ExecMode platformOld = setExecMode(mode);
loadTestConfiguration(getTestConfiguration(TEST_NAME2));
@@ -944,9 +1082,24 @@ public class BuiltinIncSliceLineTest extends
AutomatedTestBase {
e =
TestUtils.convertHashMapToDoubleArray(readDMLMatrixFromOutputDir("e"));
}
int numOfAddedTuples = (int) Math.round(newX.length *
proportionOfTuplesAddedInPercent / 100.0);
-
+ int numOfRemovedTuples = (int) Math.round((newX.length -
numOfAddedTuples) * proportionOfTuplesRemovedInPercent / 100.0);
+ if(numOfRemovedTuples == 0){
+ numOfRemovedTuples = 1;
+ }
+
double[][] addedX = new double[numOfAddedTuples][newX[0].length];
double[][] oldX = new double[newX.length -
numOfAddedTuples][newX[0].length];
+ double[][] indicesRemoved = new double[numOfRemovedTuples][1];
+
+ if(removeTuples){
+ for (int i = 0; i < numOfRemovedTuples; i++) {
+ indicesRemoved[i][0] = i +1;
+ }
+ } else {
+ for (int i = 0; i < numOfRemovedTuples; i++) {
+ indicesRemoved[i][0] = 0;
+ }
+ }
for (int i = 0; i < numOfAddedTuples; i++) {
addedX[i] = newX[i];
@@ -955,6 +1108,7 @@ public class BuiltinIncSliceLineTest extends
AutomatedTestBase {
for (int i = numOfAddedTuples; i < newX.length; i++) {
oldX[i - numOfAddedTuples] = newX[i];
}
+
double[][] addedE = new double[numOfAddedTuples][e[0].length];
double[][] oldE = new double[e.length -
numOfAddedTuples][e[0].length];
if(onlyNullEAdded){
@@ -976,10 +1130,11 @@ public class BuiltinIncSliceLineTest extends
AutomatedTestBase {
writeInputMatrixWithMTD("oldX", oldX, false);
writeInputMatrixWithMTD("oldE", oldE, false);
writeInputMatrixWithMTD("addedE", addedE, false);
+ writeInputMatrixWithMTD("indicesRemoved", indicesRemoved, false);
fullDMLScriptName = HOME + TEST_NAME2 + ".dml";
programArgs = new String[] { "-args", input("addedX"),
input("oldX"), input("oldE"), input("addedE"), String.valueOf(K),
- String.valueOf(!dp).toUpperCase(),
String.valueOf(selCols).toUpperCase(),
+ String.valueOf(!dp).toUpperCase(),
String.valueOf(selCols).toUpperCase(), String.valueOf(encodeLat).toUpperCase(),
input("indicesRemoved"),
String.valueOf(VERBOSE).toUpperCase(), output("R1"),
output("R2") };
runTest(true, false, null, -1);
@@ -991,7 +1146,50 @@ public class BuiltinIncSliceLineTest extends
AutomatedTestBase {
TestUtils.compareMatrices(ret1, ret2, 1e-2);
+ // if removeTuples is true, then we need to remove the tuples from
the oldX and oldE
+ if(removeTuples){
+ double[][] tempOldX = new double[oldX.length -
numOfRemovedTuples][oldX[0].length];
+ double[][] tempOldE = new double[oldE.length -
numOfRemovedTuples][oldE[0].length];
+ int j = 0;
+ for (int i = 0; i < oldX.length; i++) {
+ if(j < indicesRemoved.length){
+ if(i == (int) indicesRemoved[j][0] - 1){
+ j++;
+ } else {
+ tempOldX[i-j] = oldX[i];
+ tempOldE[i-j] = oldE[i];
+ }
+ } else {
+ tempOldX[i-j] = oldX[i];
+ tempOldE[i-j] = oldE[i];
+ }
+ }
+ oldX = tempOldX;
+ oldE = tempOldE;
+
+ }
+
+ // combine oldX and addedX and write it into newX
+ newX = new double[oldX.length + addedX.length][oldX[0].length];
+ for (int i = 0; i < oldX.length; i++) {
+ newX[i] = oldX[i];
+ }
+ for (int i = 0; i < addedX.length; i++) {
+ newX[oldX.length + i] = addedX[i];
+ }
+
+ // combine oldE and addedE and write it into e
+ e = new double[oldE.length + addedE.length][oldE[0].length];
+ for (int i = 0; i < oldE.length; i++) {
+ e[i] = oldE[i];
+ }
+ for (int i = 0; i < addedE.length; i++) {
+ e[oldE.length + i] = addedE[i];
+ }
+
+
+
if(customX != null && customE != null){
newX = customX;
e = customE;
@@ -1013,7 +1211,7 @@ public class BuiltinIncSliceLineTest extends
AutomatedTestBase {
// compare expected results
- if (err.equals("e") && customX == null && customE == null &&
!onlyNullEAdded) {
+ if (err.equals("e") && customX == null && customE == null &&
!onlyNullEAdded && !removeTuples) {
double[][] ret =
TestUtils.convertHashMapToDoubleArray(dmlfile1);
if (mode != ExecMode.SPARK) // TODO why only CP correct, but R
always matches? test framework?
for (int i = 0; i < K; i++)
@@ -1058,7 +1256,7 @@ public class BuiltinIncSliceLineTest extends
AutomatedTestBase {
}
public void testIncSliceLineCustomInputsFull(double[][] addedX, double[][]
oldX, double[][] oldE, double[][] addedE, int K, double[][] correctRes) {
- boolean dp = true, selCols = false;
+ boolean dp = true, selCols = false, encodeLat = true;
ExecMode mode = ExecMode.SINGLE_NODE;
ExecMode platformOld = setExecMode(mode);
loadTestConfiguration(getTestConfiguration(TEST_NAME2));
@@ -1067,14 +1265,19 @@ public class BuiltinIncSliceLineTest extends
AutomatedTestBase {
try {
loadTestConfiguration(getTestConfiguration(TEST_NAME2));
+ double[][] indicesRemoved = new double[1][1];
+ indicesRemoved[0][0] = 0;
+
+
writeInputMatrixWithMTD("addedX", addedX, false);
writeInputMatrixWithMTD("oldX", oldX, false);
writeInputMatrixWithMTD("oldE", oldE, false);
writeInputMatrixWithMTD("addedE", addedE, false);
+ writeInputMatrixWithMTD("indicesRemoved", indicesRemoved, false);
fullDMLScriptName = HOME + TEST_NAME2 + ".dml";
programArgs = new String[] { "-args", input("addedX"),
input("oldX"), input("oldE"), input("addedE"), String.valueOf(K),
- String.valueOf(!dp).toUpperCase(),
String.valueOf(selCols).toUpperCase(),
+ String.valueOf(!dp).toUpperCase(),
String.valueOf(selCols).toUpperCase(), String.valueOf(encodeLat).toUpperCase(),
input("indicesRemoved"),
String.valueOf(VERBOSE).toUpperCase(), output("R1"),
output("R2") };
runTest(true, false, null, -1);
@@ -1092,4 +1295,4 @@ public class BuiltinIncSliceLineTest extends
AutomatedTestBase {
}
}
-}
\ No newline at end of file
+}
diff --git a/src/test/scripts/functions/builtin/incSliceLine.dml
b/src/test/scripts/functions/builtin/incSliceLine.dml
index 1a43ab25f0..5df8b597c6 100644
--- a/src/test/scripts/functions/builtin/incSliceLine.dml
+++ b/src/test/scripts/functions/builtin/incSliceLine.dml
@@ -23,8 +23,7 @@ addedX = read($1);
e = read($2);
# call slice finding
-[TS,TR] = incSliceLine(addedX=addedX, newE=e, k=$3,
+[TS,TR] = incSliceLine(addedX=addedX, addedE=e, k=$3,
alpha=0.95, minSup=4, tpEval=$4, selFeat=$5, verbose=$6);
write(TR, $7)
-
diff --git a/src/test/scripts/functions/builtin/incSliceLineFull.dml
b/src/test/scripts/functions/builtin/incSliceLineFull.dml
index 5d107ba998..d9dbafc7e0 100644
--- a/src/test/scripts/functions/builtin/incSliceLineFull.dml
+++ b/src/test/scripts/functions/builtin/incSliceLineFull.dml
@@ -25,19 +25,53 @@ totalX = rbind(oldX, addedX);
oldE = read($3);
addedE = read($4);
totalE = rbind(oldE, addedE);
+indicesRemoved = read($9);
-# call slice finding
-[TK, TKC, D, L, RL, Xout, eOut, params] = incSliceLine(addedX=oldX, newE=oldE,
k=$5,
- alpha=0.95, minSup=4, tpEval=$6, selFeat=$7, verbose=$8);
+if(nrow(indicesRemoved) > 0){
+ if(as.scalar(indicesRemoved[1]) == 0){
+ indicesRemoved = matrix(0, 0, 0);
+ }
+}
-[TK1, TKC1, D1, L1, RL1, Xout1, eOut1, params] = incSliceLine(addedX=addedX,
oldX = oldX, oldE = oldE, newE=addedE, prevLattice = L, prevRL = RL, prevTK =
TK, prevTKC = TKC, k=$5,
- alpha=0.95, minSup=4, tpEval=$6, selFeat=$7, verbose=$8, params=params);
+# first compute the top k slices in two increments
+ # first increment
+[TK, TKC, D, L, meta, Stats, Xout, eOut, foffb, foffe, params] =
incSliceLine(addedX=oldX, addedE=oldE, k=$5,
+ alpha=0.95, minSup=4, tpEval=$6, selFeat=$7, encodeLat=$8, verbose=$10);
+
+ # second increment
+[TK1, TKC1, D1, L1, meta1, Stats1, Xout1, eOut1, foffb2, foffe2, params] =
incSliceLine(addedX=addedX, oldX = oldX, oldE = oldE, addedE=addedE,
prevLattice = L, metaPrevLattice=meta, prevStats = Stats, prevTK = TK, prevTKC
= TKC, k=$5,
+ alpha=0.95, minSup=4, tpEval=$6, selFeat=$7, encodeLat=$8,
indicesRemoved=indicesRemoved, verbose=$10, params=params, prevFoffb = foffb,
prevFoffe = foffe);
-[TK2, TKC2, D2, L2, RL2, Xout2, eOut2, params] = incSliceLine(addedX=totalX,
newE=totalE, k=$5,
- alpha=0.95, minSup=4, tpEval=$6, selFeat=$7, verbose=$8);
+# prepare totalX and totalE for running sliceline on total data
+if(nrow(indicesRemoved) > 0){
+ oldX = removeRowsByIndices(oldX, indicesRemoved);
+ oldE = removeRowsByIndices(oldE, indicesRemoved);
+ totalX = rbind(oldX, addedX);
+ totalE = rbind(oldE, addedE);
+}
+# call sliceline on total data
+[TK2, TKC2, D2, L2, meta2, Stats2, Xout2, eOut2, foffb3, foffe3, params] =
incSliceLine(addedX=totalX, addedE=totalE, k=$5,
+ alpha=0.95, minSup=4, tpEval=$6, selFeat=$7, encodeLat=$8, verbose=$10);
+write(TKC1, $11)
+write(TKC2, $12)
-write(TKC1, $9)
-write(TKC2, $10)
-
+# Function to remove rows from matrix M based on a list of indices
+removeRowsByIndices = function(Matrix[Double] M, Matrix[Double] indices)
+ return (Matrix[Double] result)
+{
+ result = matrix(0, 0, ncol(M));
+ index = 1;
+ for(i in 1:nrow(indices)){
+ index2 = as.scalar(indices[i]);
+ if(index == index2){
+ index = index + 1;
+ i = i + 1;
+ } else {
+ result = rbind(result, M[index:(index2-1),]);
+ index = index2+1;
+ }
+ }
+ result = rbind(result, M[index:nrow(M),]);
+}