This is an automated email from the ASF dual-hosted git repository.

arnabp20 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/systemds.git


The following commit(s) were added to refs/heads/master by this push:
     new b75cf91  [SYSTEMDS-2769] Fix lineage cache eviction test
b75cf91 is described below

commit b75cf91b9a1077fc04468417ce87d33181295c62
Author: arnabp <arnab.ph...@tugraz.at>
AuthorDate: Wed Dec 23 18:38:13 2020 +0100

    [SYSTEMDS-2769] Fix lineage cache eviction test
    
    This patch replaces the current cache eviction test script
    with a better and robust (hopefully) one. This script simulates
    a mini-batch scenario with batch-wise preprocessing, which can
    be reused per epoch.
---
 .../test/functions/lineage/CacheEvictionTest.java  | 52 +++++++-----------
 .../scripts/functions/lineage/CacheEviction2.dml   | 61 ++++++++++++++++++++++
 2 files changed, 81 insertions(+), 32 deletions(-)

diff --git 
a/src/test/java/org/apache/sysds/test/functions/lineage/CacheEvictionTest.java 
b/src/test/java/org/apache/sysds/test/functions/lineage/CacheEvictionTest.java
index c2b79d3..fa9bae6 100644
--- 
a/src/test/java/org/apache/sysds/test/functions/lineage/CacheEvictionTest.java
+++ 
b/src/test/java/org/apache/sysds/test/functions/lineage/CacheEvictionTest.java
@@ -29,7 +29,6 @@ import org.apache.sysds.hops.recompile.Recompiler;
 import org.apache.sysds.runtime.lineage.Lineage;
 import org.apache.sysds.runtime.lineage.LineageCacheConfig;
 import org.apache.sysds.runtime.lineage.LineageCacheConfig.ReuseCacheType;
-import org.apache.sysds.runtime.lineage.LineageCacheEviction;
 import org.apache.sysds.runtime.lineage.LineageCacheStatistics;
 import org.apache.sysds.runtime.matrix.data.MatrixValue;
 import org.apache.sysds.test.TestConfiguration;
@@ -42,7 +41,7 @@ import org.junit.Test;
 public class CacheEvictionTest extends LineageBase {
 
        protected static final String TEST_DIR = "functions/lineage/";
-       protected static final String TEST_NAME1 = "CacheEviction1";
+       protected static final String TEST_NAME1 = "CacheEviction2";
 
        protected String TEST_CLASS_DIR = TEST_DIR + 
CacheEvictionTest.class.getSimpleName() + "/";
        
@@ -65,17 +64,16 @@ public class CacheEvictionTest extends LineageBase {
                        LOG.debug("------------ BEGIN " + testname + 
"------------");
                        
                        /* This test verifies the order of evicted items w.r.t. 
the specified
-                        * cache policies. This test enables individual 
components of the 
-                        * scoring function by masking the other components, 
and compare the
-                        * order of evicted entries for different policies. 
HYBRID policy is 
-                        * not considered for this test as it is hard to 
anticipate the reuse
-                        * statistics if all the components are unmasked. 
+                        * cache policies, using a mini-batch wise autoencoder 
inspired
+                        * test script. An epoch-wise reusable scale and shift 
is part of
+                        * every batch processing. LRU fails to reuse the scale 
calls as
+                        * it tends to evicts scale and shift intermediates due 
to higher
+                        * number of post scale intermediates, where cost & 
size successfully
+                        * reuses all the reusable operations.
                         * 
-                        * TODO: Test disk spilling, which will need some 
tunings in eviction
-                        * logic; otherwise the automated test might take 
significantly 
-                        * longer as eviction logic tends to just delete 
entries with little
-                        * computation and estimated I/O time. Note that disk 
spilling is 
-                        * already happening as part of other tests (e.g. 
MultiLogReg).
+                        * TODO: add DagHeight. All three policies perform as 
expected in my
+                        * laptop, but for some reasons, LRU performs better in 
github actions
+                        * - that leads to failed comparison between dagheight 
and LRU.
                         */
                        
                        OptimizerUtils.ALLOW_ALGEBRAIC_SIMPLIFICATION = false;
@@ -84,8 +82,7 @@ public class CacheEvictionTest extends LineageBase {
                        getAndLoadTestConfiguration(testname);
                        fullDMLScriptName = getScript();
                        Lineage.resetInternalState();
-                       long cacheSize = LineageCacheEviction.getCacheLimit();
-                       LineageCacheConfig.setReusableOpcodes("exp", "+", 
"round");
+                       LineageCacheConfig.setSpill(false); //disable spilling
                        
                        // LRU based eviction
                        List<String> proArgs = new ArrayList<>();
@@ -94,14 +91,12 @@ public class CacheEvictionTest extends LineageBase {
                        
proArgs.add(ReuseCacheType.REUSE_FULL.name().toLowerCase());
                        proArgs.add("policy_lru");
                        proArgs.add("-args");
-                       proArgs.add(String.valueOf(cacheSize));
                        proArgs.add(output("R"));
                        programArgs = proArgs.toArray(new 
String[proArgs.size()]);
                        runTest(true, EXCEPTION_NOT_EXPECTED, null, -1);
                        HashMap<MatrixValue.CellIndex, Double> R_lru = 
readDMLMatrixFromOutputDir("R");
-                       long expCount_lru = 
Statistics.getCPHeavyHitterCount("exp");
                        long hitCount_lru = 
LineageCacheStatistics.getInstHits();
-                       long evictedCount_lru = 
LineageCacheStatistics.getMemDeletes();
+                       long colmeanCount_lru = 
Statistics.getCPHeavyHitterCount("uacmean");
                        
                        // costnsize scheme (computationTime/Size)
                        proArgs.clear();
@@ -110,35 +105,28 @@ public class CacheEvictionTest extends LineageBase {
                        
proArgs.add(ReuseCacheType.REUSE_FULL.name().toLowerCase());
                        proArgs.add("policy_costnsize");
                        proArgs.add("-args");
-                       proArgs.add(String.valueOf(cacheSize));
                        proArgs.add(output("R"));
                        programArgs = proArgs.toArray(new 
String[proArgs.size()]);
                        Lineage.resetInternalState();
                        runTest(true, EXCEPTION_NOT_EXPECTED, null, -1);
                        HashMap<MatrixValue.CellIndex, Double> R_costnsize= 
readDMLMatrixFromOutputDir("R");
-                       long expCount_wt = 
Statistics.getCPHeavyHitterCount("exp");
-                       long hitCount_wt = LineageCacheStatistics.getInstHits();
-                       long evictedCount_wt = 
LineageCacheStatistics.getMemDeletes();
-                       LineageCacheConfig.resetReusableOpcodes();
+                       long hitCount_cs = LineageCacheStatistics.getInstHits();
+                       long colmeanCount_cs = 
Statistics.getCPHeavyHitterCount("uacmean");
                        
                        // Compare results
                        Lineage.setLinReuseNone();
                        TestUtils.compareMatrices(R_lru, R_costnsize, 1e-6, 
"LRU", "costnsize");
-                       
-                       // Compare reused instructions
-                       Assert.assertTrue(expCount_lru >= expCount_wt);
-                       // Compare counts of evicted items
-                       // LRU tends to evict more entries to recover equal 
amount of memory
-                       // Note: changed to equals to fix flaky tests where 
both are not evicted at all
-                       // (e.g., due to high execution time as sometimes 
observed through github actions)
-                       Assert.assertTrue(("Violated expected evictions: 
"+evictedCount_lru+" >= "+evictedCount_wt),
-                               evictedCount_lru >= evictedCount_wt);
                        // Compare cache hits
-                       Assert.assertTrue(hitCount_lru < hitCount_wt);
+                       Assert.assertTrue("Violated cache hit count: 
"+hitCount_lru+" < "+hitCount_cs, 
+                                       hitCount_lru < hitCount_cs);
+                       // Compare reused instruction (uacmean) counts
+                       Assert.assertTrue("Violated uacmean count: 
"+colmeanCount_cs+" < "+colmeanCount_lru, 
+                                       colmeanCount_cs < colmeanCount_lru);
                }
                finally {
                        OptimizerUtils.ALLOW_ALGEBRAIC_SIMPLIFICATION = 
old_simplification;
                        OptimizerUtils.ALLOW_SUM_PRODUCT_REWRITES = 
old_sum_product;
+                       LineageCacheConfig.setSpill(true);
                        Recompiler.reinitRecompiler();
                }
        }
diff --git a/src/test/scripts/functions/lineage/CacheEviction2.dml 
b/src/test/scripts/functions/lineage/CacheEviction2.dml
new file mode 100644
index 0000000..f1297b7
--- /dev/null
+++ b/src/test/scripts/functions/lineage/CacheEviction2.dml
@@ -0,0 +1,61 @@
+#-------------------------------------------------------------
+#
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements.  See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership.  The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License.  You may obtain a copy of the License at
+#
+#   http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing,
+# software distributed under the License is distributed on an
+# "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+# KIND, either express or implied.  See the License for the
+# specific language governing permissions and limitations
+# under the License.
+#
+#-------------------------------------------------------------
+
+D = rand(rows=6400, cols=784, min=0, max=20, seed=42)
+bs = 32;
+ep = 10;
+iter_ep = ceil(nrow(D)/bs);
+maxiter = ep * iter_ep;
+beg = 1;
+iter = 0;
+i = 1;
+
+while (iter < maxiter) {
+  end = beg + bs - 1;
+  if (end>nrow(D))
+    end = nrow(D);
+  X = D[beg:end,]
+
+  #reusable OP across epochs
+  X = scale(X, TRUE, TRUE);
+  #pollute cache with not reusable OPs
+  X = ((X + X) * i - X) / (i+1)
+  X = ((X + X) * i - X) / (i+1)
+  X = ((X + X) * i - X) / (i+1)
+  X = ((X + X) * i - X) / (i+1)
+  X = ((X + X) * i - X) / (i+1)
+  X = ((X + X) * i - X) / (i+1)
+  X = ((X + X) * i - X) / (i+1)
+  X = ((X + X) * i - X) / (i+1)
+  X = ((X + X) * i - X) / (i+1)
+  X = ((X + X) * i - X) / (i+1)
+
+  iter = iter + 1;
+  if (end == nrow(D))
+    beg = 1;
+  else
+    beg = end + 1;
+  i = i + 1;
+
+}
+R = X;
+write(R, $1, format="text");
+

Reply via email to