This is an automated email from the ASF dual-hosted git repository.
arnabp20 pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/systemds.git
The following commit(s) were added to refs/heads/master by this push:
new 8f17903 [SYSTEMDS-2755] Lineage cache benefit estimator
8f17903 is described below
commit 8f179036aa2306e0e090a41e7c8224125d3597d2
Author: arnabp <[email protected]>
AuthorDate: Fri Dec 4 23:30:17 2020 +0100
[SYSTEMDS-2755] Lineage cache benefit estimator
This patch intoduces a new option to -lineage, estimate, which
collects various statistics and prints to stdout. Collected
stats include, but not limited to, total saved computation time,
size of all cachable and reused intermediates, list of opcodes
ordered by compute time saved. Examples and descriptions are
available in jira.
---
src/main/java/org/apache/sysds/api/DMLOptions.java | 3 +
src/main/java/org/apache/sysds/api/DMLScript.java | 3 +
.../org/apache/sysds/api/ScriptExecutorUtils.java | 4 +
.../sysds/hops/ipa/IPAPassFlagNonDeterminism.java | 3 +-
.../sysds/runtime/controlprogram/ProgramBlock.java | 2 +-
.../instructions/cp/FunctionCallCPInstruction.java | 9 +-
.../org/apache/sysds/runtime/lineage/Lineage.java | 1 +
.../apache/sysds/runtime/lineage/LineageCache.java | 11 ++
.../sysds/runtime/lineage/LineageCacheConfig.java | 11 +-
.../sysds/runtime/lineage/LineageEstimator.java | 219 +++++++++++++++++++++
.../lineage/LineageEstimatorStatistics.java | 87 ++++++++
11 files changed, 346 insertions(+), 7 deletions(-)
diff --git a/src/main/java/org/apache/sysds/api/DMLOptions.java
b/src/main/java/org/apache/sysds/api/DMLOptions.java
index 7db0043..0d82a50 100644
--- a/src/main/java/org/apache/sysds/api/DMLOptions.java
+++ b/src/main/java/org/apache/sysds/api/DMLOptions.java
@@ -63,6 +63,7 @@ public class DMLOptions {
public boolean lineage_dedup = false; //
whether deduplicate lineage items
public ReuseCacheType linReuseType = ReuseCacheType.NONE; //
reuse type (full, partial, hybrid)
public LineageCachePolicy linCachePolicy= LineageCachePolicy.HYBRID;
// lineage cache eviction policy
+ public boolean lineage_estimate = false; //
whether estimate reuse benefits
public boolean fedWorker = false;
public int fedWorkerPort = -1;
public boolean checkPrivacy = false; // Check
which privacy constraints are loaded and checked during federated execution
@@ -137,6 +138,8 @@ public class DMLOptions {
dmlOptions.linCachePolicy = LineageCachePolicy.DAGHEIGHT;
else if
(lineageType.equalsIgnoreCase("policy_hybrid"))
dmlOptions.linCachePolicy = LineageCachePolicy.HYBRID;
+ else if
(lineageType.equalsIgnoreCase("estimate"))
+
dmlOptions.lineage_estimate = lineageType.equalsIgnoreCase("estimate");
else
throw new
org.apache.commons.cli.ParseException(
"Invalid
argument specified for -lineage option: " + lineageType);
diff --git a/src/main/java/org/apache/sysds/api/DMLScript.java
b/src/main/java/org/apache/sysds/api/DMLScript.java
index dfe6825..703ae6c 100644
--- a/src/main/java/org/apache/sysds/api/DMLScript.java
+++ b/src/main/java/org/apache/sysds/api/DMLScript.java
@@ -98,6 +98,7 @@ public class DMLScript
public static boolean LINEAGE_DEDUP =
DMLOptions.defaultOptions.lineage_dedup; // whether deduplicate lineage
items
public static ReuseCacheType LINEAGE_REUSE =
DMLOptions.defaultOptions.linReuseType; // whether lineage-based reuse
public static LineageCachePolicy LINEAGE_POLICY =
DMLOptions.defaultOptions.linCachePolicy; // lineage cache eviction policy
+ public static boolean LINEAGE_ESTIMATE =
DMLOptions.defaultOptions.lineage_estimate; // whether estimate reuse benefits
public static boolean CHECK_PRIVACY =
DMLOptions.defaultOptions.checkPrivacy; // Check which privacy constraints
are loaded and checked during federated execution
public static boolean USE_ACCELERATOR =
DMLOptions.defaultOptions.gpu;
@@ -218,6 +219,7 @@ public class DMLScript
LINEAGE_DEDUP = dmlOptions.lineage_dedup;
LINEAGE_REUSE = dmlOptions.linReuseType;
LINEAGE_POLICY = dmlOptions.linCachePolicy;
+ LINEAGE_ESTIMATE = dmlOptions.lineage_estimate;
CHECK_PRIVACY = dmlOptions.checkPrivacy;
String fnameOptConfig = dmlOptions.configFile;
@@ -250,6 +252,7 @@ public class DMLScript
LineageCacheConfig.setConfig(LINEAGE_REUSE);
LineageCacheConfig.setCachePolicy(LINEAGE_POLICY);
+ LineageCacheConfig.setEstimator(LINEAGE_ESTIMATE);
String dmlScriptStr = readDMLScript(isFile,
fileOrScript);
Map<String, String> argVals = dmlOptions.argVals;
diff --git a/src/main/java/org/apache/sysds/api/ScriptExecutorUtils.java
b/src/main/java/org/apache/sysds/api/ScriptExecutorUtils.java
index 6758717..970a800 100644
--- a/src/main/java/org/apache/sysds/api/ScriptExecutorUtils.java
+++ b/src/main/java/org/apache/sysds/api/ScriptExecutorUtils.java
@@ -34,6 +34,7 @@ import org.apache.sysds.runtime.instructions.cp.Data;
import org.apache.sysds.runtime.instructions.gpu.context.GPUContext;
import org.apache.sysds.runtime.instructions.gpu.context.GPUContextPool;
import org.apache.sysds.runtime.instructions.gpu.context.GPUObject;
+import org.apache.sysds.runtime.lineage.LineageEstimatorStatistics;
import org.apache.sysds.utils.Statistics;
public class ScriptExecutorUtils {
@@ -120,6 +121,9 @@ public class ScriptExecutorUtils {
Statistics.stopRunTimer();
System.out.println(Statistics.display(statisticsMaxHeavyHitters > 0 ?
statisticsMaxHeavyHitters :
DMLScript.STATISTICS_COUNT));
+
+ if (DMLScript.LINEAGE_ESTIMATE)
+
System.out.println(LineageEstimatorStatistics.displayLineageEstimates());
}
}
diff --git
a/src/main/java/org/apache/sysds/hops/ipa/IPAPassFlagNonDeterminism.java
b/src/main/java/org/apache/sysds/hops/ipa/IPAPassFlagNonDeterminism.java
index 6275f10..9dbe814 100644
--- a/src/main/java/org/apache/sysds/hops/ipa/IPAPassFlagNonDeterminism.java
+++ b/src/main/java/org/apache/sysds/hops/ipa/IPAPassFlagNonDeterminism.java
@@ -23,6 +23,7 @@ import java.util.ArrayList;
import java.util.Collection;
import java.util.HashSet;
+import org.apache.sysds.api.DMLScript;
import org.apache.sysds.hops.FunctionOp;
import org.apache.sysds.hops.Hop;
import org.apache.sysds.hops.HopsException;
@@ -50,7 +51,7 @@ public class IPAPassFlagNonDeterminism extends IPAPass {
@Override
public boolean rewriteProgram (DMLProgram prog, FunctionCallGraph
fgraph, FunctionCallSizeInfo fcallSizes)
{
- if (!LineageCacheConfig.isMultiLevelReuse())
+ if (!LineageCacheConfig.isMultiLevelReuse() &&
!DMLScript.LINEAGE_ESTIMATE)
return false;
try {
diff --git
a/src/main/java/org/apache/sysds/runtime/controlprogram/ProgramBlock.java
b/src/main/java/org/apache/sysds/runtime/controlprogram/ProgramBlock.java
index 8bfdcc7..263ecf4 100644
--- a/src/main/java/org/apache/sysds/runtime/controlprogram/ProgramBlock.java
+++ b/src/main/java/org/apache/sysds/runtime/controlprogram/ProgramBlock.java
@@ -244,7 +244,7 @@ public abstract class ProgramBlock implements ParseInfo
// try to reuse instruction result from lineage cache
if( !LineageCache.reuse(tmp, ec) ) {
- long et0 = !ReuseCacheType.isNone() ?
System.nanoTime() : 0;
+ long et0 = (!ReuseCacheType.isNone() ||
DMLScript.LINEAGE_ESTIMATE) ? System.nanoTime() : 0;
// process actual instruction
tmp.processInstruction(ec);
diff --git
a/src/main/java/org/apache/sysds/runtime/instructions/cp/FunctionCallCPInstruction.java
b/src/main/java/org/apache/sysds/runtime/instructions/cp/FunctionCallCPInstruction.java
index ad24ced..f8e0e6a 100644
---
a/src/main/java/org/apache/sysds/runtime/instructions/cp/FunctionCallCPInstruction.java
+++
b/src/main/java/org/apache/sysds/runtime/instructions/cp/FunctionCallCPInstruction.java
@@ -125,7 +125,7 @@ public class FunctionCallCPInstruction extends
CPInstruction {
}
// check if function outputs can be reused from cache
- LineageItem[] liInputs = DMLScript.LINEAGE &&
LineageCacheConfig.isMultiLevelReuse() ?
+ LineageItem[] liInputs = LineageCacheConfig.isMultiLevelReuse()
|| DMLScript.LINEAGE_ESTIMATE ?
LineageItemUtils.getLineage(ec, _boundInputs) : null;
if (!fpb.isNondeterministic() && reuseFunctionOutputs(liInputs,
fpb, ec))
return; //only if all the outputs are found in cache
@@ -184,7 +184,7 @@ public class FunctionCallCPInstruction extends
CPInstruction {
fn_ec.setVariables(functionVariables);
fn_ec.setLineage(lineage);
// execute the function block
- long t0 = !ReuseCacheType.isNone() ? System.nanoTime() : 0;
+ long t0 = !ReuseCacheType.isNone()||DMLScript.LINEAGE_ESTIMATE
? System.nanoTime() : 0;
try {
fpb._functionName = this._functionName;
fpb._namespace = this._namespace;
@@ -197,7 +197,7 @@ public class FunctionCallCPInstruction extends
CPInstruction {
String fname =
DMLProgram.constructFunctionKey(_namespace, _functionName);
throw new DMLRuntimeException("error executing function
" + fname, e);
}
- long t1 = !ReuseCacheType.isNone() ? System.nanoTime() : 0;
+ long t1 = !ReuseCacheType.isNone()||DMLScript.LINEAGE_ESTIMATE
? System.nanoTime() : 0;
// cleanup all returned variables w/o binding
HashSet<String> expectRetVars = new HashSet<>();
@@ -240,7 +240,8 @@ public class FunctionCallCPInstruction extends
CPInstruction {
}
//update lineage cache with the functions outputs
- if (DMLScript.LINEAGE && LineageCacheConfig.isMultiLevelReuse()
&& !fpb.isNondeterministic()) {
+ if ((DMLScript.LINEAGE &&
LineageCacheConfig.isMultiLevelReuse() && !fpb.isNondeterministic())
+ || (LineageCacheConfig.isEstimator() &&
!fpb.isNondeterministic())) {
LineageCache.putValue(fpb.getOutputParams(), liInputs,
getCacheFunctionName(_functionName,
fpb), fn_ec, t1-t0);
//FIXME: send _boundOutputNames instead of
fpb.getOutputParams as
diff --git a/src/main/java/org/apache/sysds/runtime/lineage/Lineage.java
b/src/main/java/org/apache/sysds/runtime/lineage/Lineage.java
index 5a8a922..b6519bc 100644
--- a/src/main/java/org/apache/sysds/runtime/lineage/Lineage.java
+++ b/src/main/java/org/apache/sysds/runtime/lineage/Lineage.java
@@ -170,6 +170,7 @@ public class Lineage {
LineageItem.resetIDSequence();
LineageCache.resetCache();
LineageCacheStatistics.reset();
+ LineageEstimator.resetEstimatorCache();
}
public static void setLinReusePartial() {
diff --git a/src/main/java/org/apache/sysds/runtime/lineage/LineageCache.java
b/src/main/java/org/apache/sysds/runtime/lineage/LineageCache.java
index 1ebaa50..d6e74e7 100644
--- a/src/main/java/org/apache/sysds/runtime/lineage/LineageCache.java
+++ b/src/main/java/org/apache/sysds/runtime/lineage/LineageCache.java
@@ -154,6 +154,9 @@ public class LineageCache
public static boolean reuse(List<String> outNames, List<DataIdentifier>
outParams,
int numOutputs, LineageItem[] liInputs, String name,
ExecutionContext ec)
{
+ if (DMLScript.LINEAGE_ESTIMATE && !name.startsWith("SB"))
+ LineageEstimator.stopEstimator(outParams, liInputs,
name);
+
if( !LineageCacheConfig.isMultiLevelReuse())
return false;
@@ -253,6 +256,10 @@ public class LineageCache
}
public static void putValue(Instruction inst, ExecutionContext ec, long
starttime) {
+ if (DMLScript.LINEAGE_ESTIMATE)
+ //forward to estimator
+ LineageEstimator.processSingleInst(inst, ec, starttime);
+
if (ReuseCacheType.isNone())
return;
long computetime = System.nanoTime() - starttime;
@@ -322,6 +329,10 @@ public class LineageCache
public static void putValue(List<DataIdentifier> outputs,
LineageItem[] liInputs, String name, ExecutionContext ec, long
computetime)
{
+ if (LineageCacheConfig.isEstimator())
+ //forward to estimator
+ LineageEstimator.processFunc(outputs, liInputs, name,
ec, computetime);
+
if (!LineageCacheConfig.isMultiLevelReuse())
return;
diff --git
a/src/main/java/org/apache/sysds/runtime/lineage/LineageCacheConfig.java
b/src/main/java/org/apache/sysds/runtime/lineage/LineageCacheConfig.java
index 3b8ee07..0b22b28 100644
--- a/src/main/java/org/apache/sysds/runtime/lineage/LineageCacheConfig.java
+++ b/src/main/java/org/apache/sysds/runtime/lineage/LineageCacheConfig.java
@@ -40,7 +40,7 @@ public class LineageCacheConfig
"uamean", "max", "min", "ifelse", "-", "sqrt", ">", "uak+",
"<=",
"^", "uamax", "uark+", "uacmean", "eigen", "ctableexpand",
"replace",
"^2", "uack+", "tak+*", "uacsqk+", "uark+", "n+", "uarimax",
"qsort",
- "qpick", "transformapply", "uarmax", "n+"
+ "qpick", "transformapply", "uarmax", "n+", "-*", "castdtm"
//TODO: Reuse everything.
};
private static String[] REUSE_OPCODES = new String[] {};
@@ -70,6 +70,7 @@ public class LineageCacheConfig
private static CachedItemHead _itemH = null;
private static CachedItemTail _itemT = null;
private static boolean _compilerAssistedRW = false;
+ private static boolean _onlyEstimate = false;
//-------------DISK SPILLING RELATED CONFIGURATIONS--------------//
@@ -282,6 +283,14 @@ public class LineageCacheConfig
return _cachepolicy;
}
+ public static void setEstimator(boolean onlyEstimator) {
+ _onlyEstimate = onlyEstimator;
+ }
+
+ public static boolean isEstimator() {
+ return _onlyEstimate;
+ }
+
public static boolean isTimeBased() {
// Check the LRU component of weights array.
return (WEIGHTS[1] > 0);
diff --git
a/src/main/java/org/apache/sysds/runtime/lineage/LineageEstimator.java
b/src/main/java/org/apache/sysds/runtime/lineage/LineageEstimator.java
new file mode 100644
index 0000000..2ac9aa5
--- /dev/null
+++ b/src/main/java/org/apache/sysds/runtime/lineage/LineageEstimator.java
@@ -0,0 +1,219 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.sysds.runtime.lineage;
+
+import java.util.Comparator;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.PriorityQueue;
+
+import org.apache.commons.lang3.tuple.MutableTriple;
+import org.apache.sysds.api.DMLScript;
+import org.apache.sysds.parser.DataIdentifier;
+import org.apache.sysds.runtime.controlprogram.caching.MatrixObject;
+import org.apache.sysds.runtime.controlprogram.context.ExecutionContext;
+import
org.apache.sysds.runtime.controlprogram.parfor.stat.InfrastructureAnalyzer;
+import org.apache.sysds.runtime.instructions.Instruction;
+import org.apache.sysds.runtime.instructions.cp.ComputationCPInstruction;
+import org.apache.sysds.runtime.instructions.cp.Data;
+import org.apache.sysds.runtime.instructions.cp.ScalarObject;
+
+public class LineageEstimator
+{
+ private static final Map<LineageItem, LineageEstimatorEntry> _cache =
new HashMap<>();
+ private static final Map<String, MutableTriple<String, Long, Double>>
_savedPerOP = new HashMap<>();
+ protected static long _startTimestamp;
+ protected static long _cachesize = 0;
+ protected static long _cacheFullCount = 0;
+ protected static long _totReusableSize = 0;
+ protected static long _totReusedSize = 0;
+ private static final double CACHE_FRAC = 0.05; //5% of JVM
+ protected static long CACHE_LIMIT;
+ private static Comparator<MutableTriple<String, Long, Double>>
savedOPComparator = (op1, op2) -> {
+ return op1.getRight() == op2.getRight() ? 0 : op1.getRight() <
op2.getRight() ? 1 : -1;
+ };
+ protected static PriorityQueue<MutableTriple<String, Long, Double>>
computeSavingInst = new PriorityQueue<>(savedOPComparator);
+
+ static {
+ long maxMem = InfrastructureAnalyzer.getLocalMaxMemory();
+ CACHE_LIMIT = ((long)(CACHE_FRAC * maxMem));
+ _startTimestamp = System.currentTimeMillis();
+ }
+ // Collected statistics descriptions:
+ // _cache = Captures all lineage traceable CP instructions
and functions
+ // computeSavingInst = A priority queue which captures reuse count and
saved computetime
+ // group by opcodes, order by saved computetime
+ // _cachesize = Total size of all entries saved in _cache
+ // _totReusableSize = Total size of all intermediates which are
reusable as per
+ // original lineage caching logic.
+ // _totReusedSize = Total size of all intermediates which are reused
and
+ // are reusable as per original lineage caching
logic.
+ // _cacheFullCount = Number of lineage traceable instructions since
the beginning
+ // of execution that fill up the cache
+
+ // TODO: handling of parfor, statementblock reuse
+ // TODO: collect lineage tracing and probing overhead (computation)
+
+ public static void processSingleInst(Instruction inst, ExecutionContext
ec, long starttime)
+ {
+ // Called for every lineage tracable instruction
+ if (!(inst instanceof ComputationCPInstruction))
+ return;
+ long computetime = System.nanoTime() - starttime;
+ ComputationCPInstruction cinst = (ComputationCPInstruction)
inst;
+ LineageItem li = cinst.getLineageItem(ec).getValue();
+ boolean isReusable = LineageCacheConfig.isReusable(inst, ec);
+ // Gather the size of this intermediate
+ Data data = ec.getVariable(((ComputationCPInstruction)
inst).output);
+ long datasize = 0;
+ if (data instanceof MatrixObject)
+ datasize =
(((MatrixObject)data).acquireReadAndRelease()).getInMemorySize();
+ else if (data instanceof ScalarObject)
+ datasize = ((ScalarObject)data).getSize();
+ else
+ return; // must be a frame
+ probePutValue(li, computetime, datasize, isReusable);
+ }
+
+ public static void stopEstimator(List<DataIdentifier> outputs,
LineageItem[]liInputs, String name) {
+ // This is called for functions, before execution of the
function body starts.
+ // To simulate multilevel reuse, stop gathering statistics for
the body
+ // if the function entry is available in the cache.
+ boolean allOutputsCached = true;
+ for (int i=0; i<outputs.size(); i++) {
+ String opcode = name + "%" + String.valueOf(i+1);
+ LineageItem li = new LineageItem(opcode, liInputs);
+ if (!_cache.containsKey(li))
+ allOutputsCached = false;
+ }
+ if (allOutputsCached)
+ DMLScript.LINEAGE_ESTIMATE = false;
+ }
+
+ public static void processFunc(List<DataIdentifier> outputs,
+ LineageItem[] liInputs, String name, ExecutionContext ec, long
computetime)
+ {
+ // This is called for functions, after end of execution.
+ // Restart stat collection and save the function entry.
+ DMLScript.LINEAGE_ESTIMATE = true;
+ for (int i=0; i<outputs.size(); i++) {
+ String opcode = name + "%" + String.valueOf(i+1);
+ LineageItem li = new LineageItem(opcode, liInputs);
+ String boundVarName = outputs.get(i).getName();
+ LineageItem boundLI = ec.getLineage().get(boundVarName);
+ if (boundLI != null)
+ boundLI.resetVisitStatusNR();
+ if (boundLI != null) {
+ Data data = ec.getVariable(boundVarName);
+ long datasize = 0;
+ if (data instanceof MatrixObject)
+ datasize =
(((MatrixObject)data).acquireReadAndRelease()).getInMemorySize();
+ else if (data instanceof ScalarObject)
+ datasize =
((ScalarObject)data).getSize();
+ else
+ return; // must be a frame
+ probePutValue(li, computetime, datasize, true);
+ }
+ }
+ }
+
+ private static void probePutValue(LineageItem li, long computetime,
long datasize, boolean isReusable) {
+ // Probe the estimator cache
+ if (_cache.containsKey(li)) {
+ LineageEstimatorEntry ee = _cache.get(li);
+
LineageEstimatorStatistics.incrementSavedComputeTime(ee.computeTime);
+ // Update reused size only once per entry
+ if (isReusable && ee.reuseCount == 0)
+ _totReusedSize += ee.memsize;
+ // Update reusecont and total time saved for this
lineage item.
+ ee.updateStats();
+ if (_savedPerOP.containsKey(getOpcode(li))) {
+ MutableTriple<String, Long, Double> op =
_savedPerOP.get(getOpcode(li));
+ computeSavingInst.remove(op);
+ // Update saved compute time, reuse count for
this operator in the queue
+ op.setRight(op.getRight() +
ee.computeTime*1e-6);
+ op.setMiddle(op.getMiddle() + 1);
+ computeSavingInst.add(op);
+ }
+ return;
+ }
+
+ // Put the entry in the estimator cache
+ LineageEstimatorEntry ee = new LineageEstimatorEntry(li,
computetime, datasize);
+ _cache.put(li, ee);
+ _cachesize += datasize;
+ if (!_savedPerOP.containsKey(getOpcode(li))) {
+ _savedPerOP.put(getOpcode(li),
MutableTriple.of(getOpcode(li), 0L, 0.0));
+ computeSavingInst.add(MutableTriple.of(getOpcode(li),
0L, 0.0));
+ }
+
+ if (isReusable)
+ // Update cacheable size only if this entry is reusable
as per
+ // the original lineage caching logic.
+ _totReusableSize += ee.memsize;
+
+ // Mark the count of instructions if cache is full
+ if (_cacheFullCount == 0 && _cachesize >= CACHE_LIMIT)
+ _cacheFullCount = _cache.size();
+ }
+
+ private static String getOpcode(LineageItem li) {
+ String opcode = li.getOpcode();
+ if (opcode.indexOf("%") == -1)
+ return opcode;
+ return opcode.substring(0, opcode.indexOf("%"));
+ }
+
+ public static int computeCacheFullTime() {
+ double d = ((double)_cacheFullCount/_cache.size())*100;
+ return (int)d;
+ }
+
+ public static void resetEstimatorCache() {
+ _cache.clear();
+ _savedPerOP.clear();
+ _cachesize = 0;
+ _cacheFullCount = 0;
+ _totReusableSize = 0;
+ _totReusedSize = 0;
+ }
+}
+
+
+class LineageEstimatorEntry {
+ protected LineageItem key;
+ protected long computeTime;
+ protected long memsize;
+ protected long timestamp;
+ protected long reuseCount = 0;
+ protected double totTimeSaved = 0; //compute time scaled by reuse count
+
+ public LineageEstimatorEntry(LineageItem li, long ct, long size) {
+ key = li;
+ timestamp = System.currentTimeMillis() -
LineageEstimator._startTimestamp;
+ computeTime = ct;
+ memsize = size;
+ }
+ public void updateStats() {
+ reuseCount++;
+ totTimeSaved = computeTime*1e-6 * reuseCount; //in ms
+ }
+}
diff --git
a/src/main/java/org/apache/sysds/runtime/lineage/LineageEstimatorStatistics.java
b/src/main/java/org/apache/sysds/runtime/lineage/LineageEstimatorStatistics.java
new file mode 100644
index 0000000..f6db467
--- /dev/null
+++
b/src/main/java/org/apache/sysds/runtime/lineage/LineageEstimatorStatistics.java
@@ -0,0 +1,87 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.sysds.runtime.lineage;
+
+import java.util.concurrent.atomic.LongAdder;
+
+import org.apache.commons.lang3.tuple.MutableTriple;
+import org.apache.sysds.utils.Statistics;
+
+public class LineageEstimatorStatistics {
+ private static final LongAdder _ctimeSaved = new LongAdder(); //in
nano sec
+ private static int maxInsts = 10;
+
+ public static void reset() {
+ _ctimeSaved.reset();
+ }
+
+ public static void incrementSavedComputeTime(long delta) {
+ // Total time saved by reusing.
+ // TODO: Handle overflow
+ _ctimeSaved.add(delta);
+ }
+
+ public static String displayComputeTime() {
+ StringBuilder sb = new StringBuilder();
+ sb.append(String.format("%.3f",
((double)Statistics.getRunTime())*1e-9)); //in sec
+ sb.append("/");
+ sb.append(String.format("%.3f",
((double)_ctimeSaved.longValue())/1000000000)); //in sec
+ return sb.toString();
+ }
+
+ public static String displaySize() {
+ //size of all cached reusable intermediates/size of reused
intermediates//cache size
+ StringBuilder sb = new StringBuilder();
+ sb.append(String.format("%.3f",
((double)LineageEstimator._totReusableSize)/(1024*1024))); //in MB
+ sb.append("/");
+ sb.append(String.format("%.3f",
((double)LineageEstimator._totReusedSize)/(1024*1024))); //in MB
+ sb.append("/");
+ sb.append(String.format("%.3f",
((double)LineageEstimator.CACHE_LIMIT)/(1024*1024))); //in MB
+ return sb.toString();
+ }
+
+ public static String displayReusableInsts() {
+ // Total time saved and reuse counts per opcode, ordered by
saved time
+ StringBuilder sb = new StringBuilder();
+ sb.append("# Instrunction\t" + " "+"Time(s) Count \n");
+ for (int i=1; i<=maxInsts; i++) {
+ MutableTriple<String, Long, Double> op =
LineageEstimator.computeSavingInst.poll();
+ int tl =
String.valueOf(op.getRight()*1e-3).indexOf(".");
+ if (op != null && op.getRight() > 0)
+ sb.append(String.valueOf(i)
+ +
String.format("%"+(4-String.valueOf(i).length())+"s", "") // 4-length(i) spaces
+ + op.getLeft()
+ +
String.format("%"+(15-op.getLeft().length())+"s", "") // 15 - length(opcode)
spaces
+ + String.format("%.3f",
op.getRight()*1e-3)
+ + String.format("%"+(8-(tl+3))+"s", "")
// 8 - length(time upto '.') spaces
+ + op.getMiddle()+ "\n");
+ }
+ return sb.toString();
+ }
+
+ public static String displayLineageEstimates() {
+ StringBuilder sb = new StringBuilder();
+ sb.append("Compute Time (Elapsed/Saved): \t" +
displayComputeTime() + " sec.\n");
+ sb.append("Space Used (C/R/L): \t\t" + displaySize() + "
MB.\n"); // total cached/reused/cache limit
+ sb.append("Cache Full Timestamp: \t\t" +
LineageEstimator.computeCacheFullTime() + "% instructions.\n");
+ sb.append(displayReusableInsts());
+ return sb.toString();
+ }
+}