This is an automated email from the ASF dual-hosted git repository.

baunsgaard pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/systemds.git


The following commit(s) were added to refs/heads/main by this push:
     new 43d512cce4 [SYSTEMDS-3436] CLA ArrayOutOfBounds in sample
43d512cce4 is described below

commit 43d512cce40713d81b1d8840bde705edee202775
Author: baunsgaard <[email protected]>
AuthorDate: Tue Sep 13 11:07:10 2022 +0200

    [SYSTEMDS-3436] CLA ArrayOutOfBounds in sample
    
    This commit fixes a minor bug in the compression framework where
    fully dense columns in a sparse block were processed slightly wrong,
    calculating 1 distinct value to many always.
    
    Also contained is minor cleanups in the compression framework and
    more tests to cover this edge case. Along with some other cases found
    for instance sparse columns containing values at same rows not being
    correct in all cases.
    
    All bugs found are minor and do not effect the overall compression.
    
    Closes #1694
---
 .../colgroup/insertionsort/MaterializeSort.java    |  18 +-
 .../compress/colgroup/mapping/AMapToData.java      |  23 +-
 .../compress/cost/ComputationCostEstimator.java    |  84 ++---
 .../compress/cost/CostEstimatorBuilder.java        |  31 +-
 .../compress/cost/CostEstimatorFactory.java        |  15 +-
 .../runtime/compress/cost/HybridCostEstimator.java |   4 +-
 .../compress/cost/InstructionTypeCounter.java      | 117 ++++---
 .../sysds/runtime/compress/estim/AComEst.java      |   5 +
 .../sysds/runtime/compress/estim/ComEstExact.java  |   5 +-
 .../sysds/runtime/compress/estim/ComEstSample.java |  35 +-
 .../runtime/compress/estim/EstimationFactors.java  |   4 +
 .../compress/estim/encoding/DenseEncoding.java     |  37 +--
 .../{IEncode.java => EncodingFactory.java}         | 110 +++---
 .../runtime/compress/estim/encoding/IEncode.java   | 321 +-----------------
 .../compress/estim/encoding/SparseEncoding.java    |  51 +--
 .../estim/sample/SampleEstimatorFactory.java       |   5 +-
 .../compress/estim/sample/ShlosserEstimator.java   |   3 -
 .../component/compress/CompressedTestBase.java     |   2 +-
 .../component/compress/colgroup/ColGroupTest.java  |   6 +-
 .../component/compress/cost/ComputeCostTest.java   |  24 +-
 .../compress/cost/InstructionCounterTest.java      | 367 +++++++++++++++++++++
 .../compress/estim/SampleEstimatorTest.java        |  13 +-
 .../estim/encoding/EncodeNegativeTest.java         |  99 ++++++
 .../estim/encoding/EncodeSampleCustom.java         |  26 --
 .../estim/encoding/EncodeSampleSingleColTest.java  |  13 +-
 .../compress/estim/encoding/EncodeSampleTest.java  |  43 ++-
 .../estim/encoding/EncodeSampleUnbalancedTest.java |  46 ++-
 .../estim/encoding/EncodeSampleUniformTest.java    |   7 +-
 .../estim/sample/SampleDistinctNegativeTest.java   |  63 ++++
 .../estim/{ => sample}/SampleDistinctTest.java     |   3 +-
 .../estim/sample/ShlosserEstimatorTest.java        |  89 +++++
 .../component/compress/mapping/MappingTests.java   |   9 +-
 .../compress/mapping/MappingTestsResize.java       |  18 +-
 .../component/compress/offset/OffsetTests.java     |   9 +
 .../federated/FedWorkerMatrixMultiplyWorkload.java |   2 +-
 35 files changed, 1034 insertions(+), 673 deletions(-)

diff --git 
a/src/main/java/org/apache/sysds/runtime/compress/colgroup/insertionsort/MaterializeSort.java
 
b/src/main/java/org/apache/sysds/runtime/compress/colgroup/insertionsort/MaterializeSort.java
index 6052aea46a..6b1a7745a6 100644
--- 
a/src/main/java/org/apache/sysds/runtime/compress/colgroup/insertionsort/MaterializeSort.java
+++ 
b/src/main/java/org/apache/sysds/runtime/compress/colgroup/insertionsort/MaterializeSort.java
@@ -19,7 +19,6 @@
 
 package org.apache.sysds.runtime.compress.colgroup.insertionsort;
 
-import org.apache.sysds.runtime.compress.DMLCompressionException;
 import org.apache.sysds.runtime.compress.colgroup.mapping.AMapToData;
 import org.apache.sysds.runtime.compress.colgroup.mapping.MapToFactory;
 import org.apache.sysds.runtime.compress.utils.IntArrayList;
@@ -54,20 +53,9 @@ public class MaterializeSort extends AInsertionSorter {
        }
 
        private void insert(int rl, int ru) {
-               try {
-                       md.fill(_numLabels);
-                       materializeInsert(rl, ru);
-                       filterInsert(rl, ru);
-               }
-               catch(Exception e) {
-                       int sum = 0;
-                       for(IntArrayList o : _offsets)
-                               sum += o.size();
-                       throw new DMLCompressionException(
-                               "Failed normal materialize sorting with list of 
" + _offsets.length + " with sum (aka output size): " + sum
-                                       + " requested Size: " + _indexes.length 
+ " range: " + rl + " " + ru,
-                               e);
-               }
+               md.fill(_numLabels);
+               materializeInsert(rl, ru);
+               filterInsert(rl, ru);
        }
 
        private void materializeInsert(int rl, int ru) {
diff --git 
a/src/main/java/org/apache/sysds/runtime/compress/colgroup/mapping/AMapToData.java
 
b/src/main/java/org/apache/sysds/runtime/compress/colgroup/mapping/AMapToData.java
index b2f1283055..45d09f7c6e 100644
--- 
a/src/main/java/org/apache/sysds/runtime/compress/colgroup/mapping/AMapToData.java
+++ 
b/src/main/java/org/apache/sysds/runtime/compress/colgroup/mapping/AMapToData.java
@@ -366,22 +366,31 @@ public abstract class AMapToData implements Serializable {
                }
        }
 
+       /**
+        * Get the number of counts of each unique value contained in this map. 
Note that in the case the mapping is shorter
+        * than number of rows the counts sum to the number of mapped values 
not the number of rows.
+        * 
+        * @return The counts
+        */
+       public final int[] getCounts() {
+               return getCounts(new int[getUnique()]);
+       }
+
        /**
         * Get the number of counts of each unique value contained in this map. 
Note that in the case the mapping is shorter
         * than number of rows the counts sum to the number of mapped values 
not the number of rows.
         * 
         * @param counts The object to return.
-        * @return the Counts
+        * @return The counts
         */
        public final int[] getCounts(int[] counts) {
                count(counts);
 
-               if(counts[counts.length - 1] == 0) {
-                       int actualUnique = counts.length;
-                       for(; actualUnique > 1; actualUnique--) {
-                               if(counts[actualUnique - 1] > 0)
-                                       break;
-                       }
+               if(counts[counts.length - 1] == 0 || counts[0] == 0) { // small 
check for first and last index.
+                       int actualUnique = 0;
+                       for(int c : counts)
+                               actualUnique += c > 0 ? 1 : 0;
+
                        throw new DMLCompressionException("Invalid number 
unique expected: " + counts.length + " but is actually: "
                                + actualUnique + " type: " + getType());
                }
diff --git 
a/src/main/java/org/apache/sysds/runtime/compress/cost/ComputationCostEstimator.java
 
b/src/main/java/org/apache/sysds/runtime/compress/cost/ComputationCostEstimator.java
index a92e0ecf7a..639859d1b9 100644
--- 
a/src/main/java/org/apache/sysds/runtime/compress/cost/ComputationCostEstimator.java
+++ 
b/src/main/java/org/apache/sysds/runtime/compress/cost/ComputationCostEstimator.java
@@ -31,51 +31,21 @@ public class ComputationCostEstimator extends ACostEstimate 
{
        /** The threshold before the commonValueImpact is starting. */
        private static final double cvThreshold = 0.2;
 
-       /** Number of scans through the column groups (aka. rowSums for 
instance) */
-       private final int _scans;
-       /** Number of decompressions of this column group directly (aka 
decompress to same size output) */
-       private final int _decompressions;
-       /** Number of operations that only modify underlying dictionary */
-       private final int _dictionaryOps;
-
-       // The following counters are different in they count the operation 
size not the number of instances.
-       /** Total number of columns to decompress overlapping into */
-       private final int _overlappingDecompressions;
-       /** Total number of left side rows to multiply with */
-       private final int _leftMultiplications;
-       /** Total number of right side columns to multiply with */
-       private final int _rightMultiplications;
-       /** Total number of left column groups to compressed multiply with, 
taken as worst case meaning number of rows */
-       private final int _compressedMultiplication;
-
-       /** Boolean specifying if the matrix is getting densified, meaning 
exploiting zeros is gone. */
-       private final boolean _isDensifying;
+       private final InstructionTypeCounter ins;
 
        protected ComputationCostEstimator(InstructionTypeCounter counts) {
-               _scans = counts.scans;
-               _decompressions = counts.decompressions;
-               _overlappingDecompressions = counts.overlappingDecompressions;
-               _leftMultiplications = counts.leftMultiplications;
-               _rightMultiplications = counts.rightMultiplications;
-               _compressedMultiplication = counts.compressedMultiplications;
-               _dictionaryOps = counts.dictionaryOps;
-               _isDensifying = counts.isDensifying;
-               // _rowBasedOps = counts.rowBasedOps;
+               this.ins = counts;
+
                if(LOG.isDebugEnabled())
                        LOG.debug(this);
        }
 
        public ComputationCostEstimator(int scans, int decompressions, int 
overlappingDecompressions,
-               int leftMultiplications, int rightMultiplications, int 
compressedMultiplication, int dictOps,
+               int leftMultiplications, int rightMultiplications, int 
compressedMultiplication, int dictOps, int indexing,
                boolean isDensifying) {
-               _scans = scans;
-               _decompressions = decompressions;
-               _overlappingDecompressions = overlappingDecompressions;
-               _leftMultiplications = leftMultiplications;
-               _rightMultiplications = rightMultiplications;
-               _compressedMultiplication = compressedMultiplication;
-               _dictionaryOps = dictOps;
-               _isDensifying = isDensifying;
+               ins = new InstructionTypeCounter(scans, decompressions, 
overlappingDecompressions, leftMultiplications,
+                       rightMultiplications, compressedMultiplication, 
dictOps, indexing, isDensifying);
+
        }
 
        @Override
@@ -84,11 +54,11 @@ public class ComputationCostEstimator extends ACostEstimate 
{
                final int nCols = g.getColumns().length;
                final int nRows = g.getNumRows();
                // assume that it is never fully sparse
-               final double sparsity = (nCols < 3 || _isDensifying) ? 1 : 
g.getTupleSparsity() + 1E-10;
+               final double sparsity = (nCols < 3 || ins.isDensifying()) ? 1 : 
g.getTupleSparsity() + 1E-10;
 
                final double commonFraction = g.getLargestOffInstances();
 
-               if(g.isEmpty() && !_isDensifying)
+               if(g.isEmpty() && !ins.isDensifying())
                        // set some small cost to empty
                        return getCost(nRows, 1, nCols, 1, 0.00001);
                else if(g.isEmpty() || g.isConst())
@@ -128,7 +98,7 @@ public class ComputationCostEstimator extends ACostEstimate {
        }
 
        public boolean isDense() {
-               return _isDensifying;
+               return ins.isDensifying();
        }
 
        @Override
@@ -136,7 +106,7 @@ public class ComputationCostEstimator extends ACostEstimate 
{
                double cost = 0;
                final double nCols = mb.getNumColumns();
                final double nRows = mb.getNumRows();
-               final double sparsity = (nCols < 3 || _isDensifying) ? 1 : 
mb.getSparsity();
+               final double sparsity = (nCols < 3 || ins.isDensifying()) ? 1 : 
mb.getSparsity();
 
                cost += dictionaryOpsCost(nRows, nCols, sparsity);
                // Naive number of floating point multiplications
@@ -160,14 +130,15 @@ public class ComputationCostEstimator extends 
ACostEstimate {
 
        @Override
        public boolean shouldSparsify() {
-               return _leftMultiplications > 0 || _compressedMultiplication > 
0 || _rightMultiplications > 0;
+               return ins.getLeftMultiplications() > 0 || 
ins.getCompressedMultiplications() > 0 ||
+                       ins.getRightMultiplications() > 0;
        }
 
        private double dictionaryOpsCost(double nVals, double nCols, double 
sparsity) {
                // Dictionary ops simply goes through dictionary and modify all 
values.
                // Therefore the cost is in number of cells in the dictionary.
                // * 2 because we allocate a output of same size at least
-               return _dictionaryOps * sparsity * nVals * nCols * 2;
+               return ins.getDictionaryOps() * sparsity * nVals * nCols * 2;
        }
 
        private double leftMultCost(double nRowsScanned, double nRows, double 
nCols, double nVals, double sparsity) {
@@ -178,7 +149,7 @@ public class ComputationCostEstimator extends ACostEstimate 
{
        }
 
        private double leftMultCost(double preAggregateCost, double 
postScalingCost) {
-               return _leftMultiplications * (preAggregateCost + 
postScalingCost);
+               return ins.getLeftMultiplications() * (preAggregateCost + 
postScalingCost);
        }
 
        private double rightMultCost(double nVals, double nCols, double 
sparsity) {
@@ -188,40 +159,33 @@ public class ComputationCostEstimator extends 
ACostEstimate {
        }
 
        private double rightMultCost(double preMultiplicationCost, double 
allocationCost) {
-               return _rightMultiplications * (preMultiplicationCost + 
allocationCost);
+               return ins.getRightMultiplications() * (preMultiplicationCost + 
allocationCost);
        }
 
        private double decompressionCost(double nVals, double nCols, double 
nRowsScanned, double sparsity) {
-               return _decompressions * (nCols * nRowsScanned * sparsity);
+               return ins.getDecompressions() * (nCols * nRowsScanned * 
sparsity);
        }
 
        private double overlappingDecompressionCost(double nRows) {
-               return _overlappingDecompressions * nRows;
+               return ins.getOverlappingDecompressions() * nRows;
        }
 
        private double scanCost(double nRowsScanned, double nVals, double 
nCols, double sparsity) {
-               return _scans * (nRowsScanned + nVals * nCols * sparsity);
+               return ins.getScans() * (nRowsScanned + nVals * nCols * 
sparsity);
        }
 
-       private double compressedMultiplicationCost(double nRowsScanned, double 
nRows, double nVals, double nCols, double sparsity) {
+       private double compressedMultiplicationCost(double nRowsScanned, double 
nRows, double nVals, double nCols,
+               double sparsity) {
                // return _compressedMultiplication * Math.max(nRowsScanned * 
nCols ,nVals * nCols * sparsity );
-               return _compressedMultiplication * (Math.max(nRowsScanned, 
nRows / 10) + nVals * nCols * sparsity);
+               return ins.getCompressedMultiplications() * 
(Math.max(nRowsScanned, nRows / 10) + nVals * nCols * sparsity);
        }
 
        @Override
        public String toString() {
                StringBuilder sb = new StringBuilder();
                sb.append(super.toString());
-               sb.append(" --- CostVector:[");
-               sb.append(_scans + ",");
-               sb.append(_decompressions + ",");
-               sb.append(_overlappingDecompressions + ",");
-               sb.append(_leftMultiplications + ",");
-               sb.append(_rightMultiplications + ",");
-               sb.append(_compressedMultiplication + ",");
-               sb.append(_dictionaryOps + "]");
-               sb.append(" Densifying:");
-               sb.append(_isDensifying);
+               sb.append(" : ");
+               sb.append(ins.toString());
                return sb.toString();
        }
 }
diff --git 
a/src/main/java/org/apache/sysds/runtime/compress/cost/CostEstimatorBuilder.java
 
b/src/main/java/org/apache/sysds/runtime/compress/cost/CostEstimatorBuilder.java
index 2a9bdac1b2..d039e092ee 100644
--- 
a/src/main/java/org/apache/sysds/runtime/compress/cost/CostEstimatorBuilder.java
+++ 
b/src/main/java/org/apache/sysds/runtime/compress/cost/CostEstimatorBuilder.java
@@ -43,7 +43,7 @@ public final class CostEstimatorBuilder implements 
Serializable {
        public CostEstimatorBuilder(WTreeRoot root) {
                counter = new InstructionTypeCounter();
                if(root.isDecompressing())
-                       counter.decompressions++;
+                       counter.incDecompressions();
                for(Op o : root.getOps())
                        addOp(1, o, counter);
                for(WTreeNode n : root.getChildNodes())
@@ -77,23 +77,23 @@ public final class CostEstimatorBuilder implements 
Serializable {
        private static void addOp(int count, Op o, InstructionTypeCounter 
counter) {
                if(o.isDecompressing()) {
                        if(o.isOverlapping())
-                               counter.overlappingDecompressions += count * 
o.dim();
+                               counter.incOverlappingDecompressions(count * 
o.dim());
                        else
-                               counter.decompressions += count;
+                               counter.incDecompressions(count);
                }
                if(o.isDensifying()) {
-                       counter.isDensifying = true;
+                       counter.setDensifying(true);
                }
 
                if(o instanceof OpSided) {
                        OpSided os = (OpSided) o;
                        final int d = o.dim();
                        if(os.isLeftMM())
-                               counter.leftMultiplications += count * d;
+                               counter.incLMM(count * d);
                        else if(os.isRightMM())
-                               counter.rightMultiplications += count * d;
+                               counter.incRMM(count * d);
                        else
-                               counter.compressedMultiplications += count * d;
+                               counter.incCMM(count * d);
                }
                else if(o instanceof OpMetadata) {
                        // ignore it
@@ -105,29 +105,30 @@ public final class CostEstimatorBuilder implements 
Serializable {
 
                                switch(agop.getDirection()) {
                                        case Row:
-                                               counter.scans += count;
+                                               counter.incScans(count);
                                                break;
                                        default:
-                                               counter.dictionaryOps += count;
+                                               counter.incDictOps(count);
                                }
                        }
                        else if(h instanceof IndexingOp) {
                                IndexingOp idxO = (IndexingOp) h;
                                if(idxO.isRowLowerEqualsUpper() && 
idxO.isColLowerEqualsUpper())
-                                       counter.indexing++;
+                                       counter.incIndexOp(count);
                                else if(idxO.isAllRows())
-                                       counter.dictionaryOps += count; // 
Technically not correct but better than decompression
+                                       // Technically not correct but better 
than decompression
+                                       counter.incDictOps(count);
                        }
                        else
-                               counter.dictionaryOps += count;
+                               counter.incDictOps(count);
                }
        }
 
        public boolean shouldTryToCompress() {
                int numberOps = 0;
-               numberOps += counter.scans + counter.leftMultiplications + 
counter.rightMultiplications +
-                       counter.compressedMultiplications + 
counter.dictionaryOps;
-               numberOps -= counter.decompressions + 
counter.overlappingDecompressions;
+               numberOps += counter.getScans()+ 
counter.getLeftMultiplications() + counter.getRightMultiplications() +
+                       counter.getCompressedMultiplications() + 
counter.getDictionaryOps();
+               numberOps -= counter.getDecompressions() + 
counter.getOverlappingDecompressions();
                return numberOps > 4;
        }
 
diff --git 
a/src/main/java/org/apache/sysds/runtime/compress/cost/CostEstimatorFactory.java
 
b/src/main/java/org/apache/sysds/runtime/compress/cost/CostEstimatorFactory.java
index 34717668a0..f03370af03 100644
--- 
a/src/main/java/org/apache/sysds/runtime/compress/cost/CostEstimatorFactory.java
+++ 
b/src/main/java/org/apache/sysds/runtime/compress/cost/CostEstimatorFactory.java
@@ -21,12 +21,25 @@ package org.apache.sysds.runtime.compress.cost;
 
 import org.apache.sysds.runtime.compress.CompressionSettings;
 
-public final class CostEstimatorFactory {
+/**
+ * Factory class for the construction of cost estimators for compression
+ */
+public interface CostEstimatorFactory {
 
        public enum CostType {
                MEMORY, W_TREE, HYBRID_W_TREE, DISTINCT, AUTO;
        }
 
+       /**
+        * Create a cost estimator to enable comparison of different suggested 
compression formats
+        * 
+        * @param cs          The compression settings to use.
+        * @param costBuilder A cost builder to build the specific specialized 
formats of cost estimators
+        * @param nRows       The number of rows in a given input to compress
+        * @param nCols       The number of columns in a given input to compress
+        * @param sparsity    The sparsity of the input to compress
+        * @return A cost estimator
+        */
        public static ACostEstimate create(CompressionSettings cs, 
CostEstimatorBuilder costBuilder, int nRows, int nCols,
                double sparsity) {
                switch(cs.costComputationType) {
diff --git 
a/src/main/java/org/apache/sysds/runtime/compress/cost/HybridCostEstimator.java 
b/src/main/java/org/apache/sysds/runtime/compress/cost/HybridCostEstimator.java
index 7eb43015bf..db5233083f 100644
--- 
a/src/main/java/org/apache/sysds/runtime/compress/cost/HybridCostEstimator.java
+++ 
b/src/main/java/org/apache/sysds/runtime/compress/cost/HybridCostEstimator.java
@@ -37,9 +37,9 @@ public class HybridCostEstimator extends ACostEstimate {
        }
 
        protected HybridCostEstimator(int scans, int decompressions, int 
overlappingDecompressions, int leftMultiplications,
-               int compressedMultiplication, int rightMultiplications, int 
dictionaryOps, boolean isDensifying) {
+               int compressedMultiplication, int rightMultiplications, int 
dictionaryOps, int indexing, boolean isDensifying) {
                costEstimator = new ComputationCostEstimator(scans, 
decompressions, overlappingDecompressions,
-                       leftMultiplications, compressedMultiplication, 
rightMultiplications, dictionaryOps, isDensifying);
+                       leftMultiplications, compressedMultiplication, 
rightMultiplications, dictionaryOps, indexing, isDensifying);
                memoryCostEstimator = new MemoryCostEstimator();
        }
 
diff --git 
a/src/main/java/org/apache/sysds/runtime/compress/cost/InstructionTypeCounter.java
 
b/src/main/java/org/apache/sysds/runtime/compress/cost/InstructionTypeCounter.java
index 2322ae94a9..1b77526086 100644
--- 
a/src/main/java/org/apache/sysds/runtime/compress/cost/InstructionTypeCounter.java
+++ 
b/src/main/java/org/apache/sysds/runtime/compress/cost/InstructionTypeCounter.java
@@ -25,25 +25,31 @@ public final class InstructionTypeCounter implements 
Serializable {
 
        private static final long serialVersionUID = 115L;
 
-       protected int total = 0;
-
-       protected int scans = 0;
-       protected int decompressions = 0;
-       protected int overlappingDecompressions = 0;
-       protected int leftMultiplications = 0;
-       protected int rightMultiplications = 0;
-       protected int compressedMultiplications = 0;
-       protected int dictionaryOps = 0; // base cost is one pass of dictionary
-       protected int indexing = 0;
-       protected boolean isDensifying = false;
+       /** Number of scans through the column groups (aka. rowSums for 
instance) */
+       private int scans = 0;
+       /** Number of decompressions of this column group directly (aka 
decompress to same size output) */
+       private int decompressions = 0;
+       /** Total number of columns to decompress overlapping into */
+       private int overlappingDecompressions = 0;
+       /** Total number of left side rows to multiply with */
+       private int leftMultiplications = 0;
+       /** Total number of right side columns to multiply with */
+       private int rightMultiplications = 0;
+       /** Total number of left column groups to compressed multiply with, 
taken as worst case meaning number of rows */
+       private int compressedMultiplications = 0;
+       /** Number of operations that only modify underlying dictionary */
+       private int dictionaryOps = 0;
+       /** Number of operations that scan through the entry index structure */
+       private int indexing = 0;
+       /** Boolean specifying if the matrix is getting densified, meaning 
exploiting zeros is gone. */
+       private boolean isDensifying = false;
 
        public InstructionTypeCounter() {
                // default no count.
        }
 
        public InstructionTypeCounter(int scans, int decompressions, int 
overlappingDecompressions, int leftMultiplications,
-               int rightMultiplications, int compressedMultiplications, int 
dictionaryOps, int indexing, int total,
-               boolean isDensifying) {
+               int rightMultiplications, int compressedMultiplications, int 
dictionaryOps, int indexing, boolean isDensifying) {
                this.scans = scans;
                this.decompressions = decompressions;
                this.overlappingDecompressions = overlappingDecompressions;
@@ -53,7 +59,6 @@ public final class InstructionTypeCounter implements 
Serializable {
                this.dictionaryOps = dictionaryOps;
                this.indexing = indexing;
                this.isDensifying = isDensifying;
-               this.total = total;
        }
 
        public int getScans() {
@@ -62,7 +67,10 @@ public final class InstructionTypeCounter implements 
Serializable {
 
        public void incScans() {
                scans++;
-               total++;
+       }
+
+       public void incScans(int c) {
+               scans += c;
        }
 
        public int getDecompressions() {
@@ -71,7 +79,10 @@ public final class InstructionTypeCounter implements 
Serializable {
 
        public void incDecompressions() {
                decompressions++;
-               total++;
+       }
+
+       public void incDecompressions(int c) {
+               decompressions += c;
        }
 
        public int getOverlappingDecompressions() {
@@ -80,7 +91,10 @@ public final class InstructionTypeCounter implements 
Serializable {
 
        public void incOverlappingDecompressions() {
                overlappingDecompressions++;
-               total++;
+       }
+
+       public void incOverlappingDecompressions(int c) {
+               overlappingDecompressions += c;
        }
 
        public int getLeftMultiplications() {
@@ -89,12 +103,10 @@ public final class InstructionTypeCounter implements 
Serializable {
 
        public void incLMM() {
                leftMultiplications++;
-               total++;
        }
 
        public void incLMM(int c) {
                leftMultiplications += c;
-               total++;
        }
 
        public int getRightMultiplications() {
@@ -103,12 +115,10 @@ public final class InstructionTypeCounter implements 
Serializable {
 
        public void incRMM() {
                rightMultiplications++;
-               total++;
        }
 
        public void incRMM(int c) {
                rightMultiplications += c;
-               total++;
        }
 
        public int getCompressedMultiplications() {
@@ -117,7 +127,10 @@ public final class InstructionTypeCounter implements 
Serializable {
 
        public void incCMM() {
                compressedMultiplications++;
-               total++;
+       }
+
+       public void incCMM(int c) {
+               compressedMultiplications += c;
        }
 
        public int getDictionaryOps() {
@@ -126,7 +139,10 @@ public final class InstructionTypeCounter implements 
Serializable {
 
        public void incDictOps() {
                dictionaryOps++;
-               total++;
+       }
+
+       public void incDictOps(int c) {
+               dictionaryOps += c;
        }
 
        public int getIndexing() {
@@ -135,43 +151,42 @@ public final class InstructionTypeCounter implements 
Serializable {
 
        public void incIndexOp() {
                indexing++;
-               total++;
        }
 
-       public static InstructionTypeCounter MMR(int nCols, int calls) {
-               return new InstructionTypeCounter(0, 0, 0, 0, nCols, 0, 0, 0, 
calls, false);
+       public void incIndexOp(int c) {
+               indexing += c;
        }
 
-       public static InstructionTypeCounter MML(int nRows, int calls) {
-               return new InstructionTypeCounter(0, 0, 0, nRows, 0, 0, 0, 0, 
calls, false);
+       public void setDensifying(boolean d) {
+               isDensifying = d;
+       }
+
+       public boolean isDensifying() {
+               return isDensifying;
        }
 
        @Override
        public String toString() {
                StringBuilder sb = new StringBuilder();
-               if(total > 1) {
-                       sb.append(String.format("Tot:%d;", total));
-                       if(scans > 0)
-                               sb.append(String.format("Sca:%d;", scans));
-                       if(decompressions > 0)
-                               sb.append(String.format("DeC:%d;", 
decompressions));
-                       if(overlappingDecompressions > 0)
-                               sb.append(String.format("OvD:%d;", 
overlappingDecompressions));
-                       if(leftMultiplications > 0)
-                               sb.append(String.format("LMM:%d;", 
leftMultiplications));
-                       if(rightMultiplications > 0)
-                               sb.append(String.format("RMM:%d;", 
rightMultiplications));
-                       if(compressedMultiplications > 0)
-                               sb.append(String.format("CMM:%d;", 
compressedMultiplications));
-                       if(dictionaryOps > 0)
-                               sb.append(String.format("dic:%d;", 
dictionaryOps));
-                       if(indexing > 0)
-                               sb.append(String.format("ind:%d;", indexing));
-                       if(sb.length() > 1)
-                               sb.setLength(sb.length() - 1); // remove last 
semicolon
-               }
-               else
-                       sb.append("Empty");
+
+               if(scans > 0)
+                       sb.append(String.format("Sca:%d;", scans));
+               if(decompressions > 0)
+                       sb.append(String.format("DeC:%d;", decompressions));
+               if(overlappingDecompressions > 0)
+                       sb.append(String.format("OvD:%d;", 
overlappingDecompressions));
+               if(leftMultiplications > 0)
+                       sb.append(String.format("LMM:%d;", 
leftMultiplications));
+               if(rightMultiplications > 0)
+                       sb.append(String.format("RMM:%d;", 
rightMultiplications));
+               if(compressedMultiplications > 0)
+                       sb.append(String.format("CMM:%d;", 
compressedMultiplications));
+               if(dictionaryOps > 0)
+                       sb.append(String.format("dic:%d;", dictionaryOps));
+               if(indexing > 0)
+                       sb.append(String.format("ind:%d;", indexing));
+               if(sb.length() > 1)
+                       sb.setLength(sb.length() - 1); // remove last semicolon
 
                return sb.toString();
        }
diff --git a/src/main/java/org/apache/sysds/runtime/compress/estim/AComEst.java 
b/src/main/java/org/apache/sysds/runtime/compress/estim/AComEst.java
index 3fd0d1a970..4d7d65cacf 100644
--- a/src/main/java/org/apache/sysds/runtime/compress/estim/AComEst.java
+++ b/src/main/java/org/apache/sysds/runtime/compress/estim/AComEst.java
@@ -291,4 +291,9 @@ public abstract class AComEst {
                        }
                }
        }
+
+       @Override
+       public String toString() {
+               return this.getClass().getSimpleName();
+       }
 }
diff --git 
a/src/main/java/org/apache/sysds/runtime/compress/estim/ComEstExact.java 
b/src/main/java/org/apache/sysds/runtime/compress/estim/ComEstExact.java
index 51a6164645..c7aac64cf4 100644
--- a/src/main/java/org/apache/sysds/runtime/compress/estim/ComEstExact.java
+++ b/src/main/java/org/apache/sysds/runtime/compress/estim/ComEstExact.java
@@ -21,6 +21,7 @@ package org.apache.sysds.runtime.compress.estim;
 
 import org.apache.sysds.runtime.compress.CompressionSettings;
 import org.apache.sysds.runtime.compress.estim.encoding.EmptyEncoding;
+import org.apache.sysds.runtime.compress.estim.encoding.EncodingFactory;
 import org.apache.sysds.runtime.compress.estim.encoding.IEncode;
 import org.apache.sysds.runtime.matrix.data.MatrixBlock;
 
@@ -35,7 +36,7 @@ public class ComEstExact extends AComEst {
 
        @Override
        public CompressedSizeInfoColGroup getColGroupInfo(int[] colIndexes, int 
estimate, int nrUniqueUpperBound) {
-               final IEncode map = IEncode.createFromMatrixBlock(_data, 
_cs.transposed, colIndexes);
+               final IEncode map = 
EncodingFactory.createFromMatrixBlock(_data, _cs.transposed, colIndexes);
                if(map instanceof EmptyEncoding)
                        return new CompressedSizeInfoColGroup(colIndexes, 
getNumRows());
                return getFacts(map, colIndexes);
@@ -43,7 +44,7 @@ public class ComEstExact extends AComEst {
 
        @Override
        public CompressedSizeInfoColGroup getDeltaColGroupInfo(int[] 
colIndexes, int estimate, int nrUniqueUpperBound) {
-               final IEncode map = IEncode.createFromMatrixBlockDelta(_data, 
_cs.transposed, colIndexes);
+               final IEncode map = 
EncodingFactory.createFromMatrixBlockDelta(_data, _cs.transposed, colIndexes);
                return getFacts(map, colIndexes);
        }
 
diff --git 
a/src/main/java/org/apache/sysds/runtime/compress/estim/ComEstSample.java 
b/src/main/java/org/apache/sysds/runtime/compress/estim/ComEstSample.java
index e22dacc431..4713b54972 100644
--- a/src/main/java/org/apache/sysds/runtime/compress/estim/ComEstSample.java
+++ b/src/main/java/org/apache/sysds/runtime/compress/estim/ComEstSample.java
@@ -23,7 +23,7 @@ import java.util.Arrays;
 import java.util.Random;
 
 import org.apache.sysds.runtime.compress.CompressionSettings;
-import org.apache.sysds.runtime.compress.DMLCompressionException;
+import org.apache.sysds.runtime.compress.estim.encoding.EncodingFactory;
 import org.apache.sysds.runtime.compress.estim.encoding.IEncode;
 import org.apache.sysds.runtime.compress.estim.sample.SampleEstimatorFactory;
 import org.apache.sysds.runtime.controlprogram.parfor.stat.Timing;
@@ -78,7 +78,7 @@ public class ComEstSample extends AComEst {
                                _data.getSparseBlock().isEmpty(colIndexes[0])))
                        return new CompressedSizeInfoColGroup(colIndexes, 
getNumRows());
 
-               final IEncode map = IEncode.createFromMatrixBlock(_sample, 
_transposed, colIndexes);
+               final IEncode map = 
EncodingFactory.createFromMatrixBlock(_sample, _transposed, colIndexes);
                return extractInfo(map, colIndexes, maxDistinct);
        }
 
@@ -86,7 +86,7 @@ public class ComEstSample extends AComEst {
        public CompressedSizeInfoColGroup getDeltaColGroupInfo(int[] 
colIndexes, int estimate, int maxDistinct) {
                // Don't use sample when doing estimation of delta encoding, 
instead we read from the start of the matrix until
                // sample size. This guarantees that the delta values are 
actually represented in the full compression
-               final IEncode map = IEncode.createFromMatrixBlockDelta(_data, 
_transposed, colIndexes, _sampleSize);
+               final IEncode map = 
EncodingFactory.createFromMatrixBlockDelta(_data, _transposed, colIndexes, 
_sampleSize);
                return extractInfo(map, colIndexes, maxDistinct);
        }
 
@@ -132,26 +132,17 @@ public class ComEstSample extends AComEst {
                final double overallSparsity = calculateSparsity(colIndexes, 
nnz, scalingFactor, sampleFacts.overAllSparsity);
                // For robustness safety add 10 percent more tuple sparsity
                final double tupleSparsity = Math.min(overallSparsity * 1.3, 
1.0); // increase sparsity by 30%.
-               try {
-                       if(_cs.isRLEAllowed()) {
-                               final int scaledRuns = Math.max(estDistinct,
-                                       calculateRuns(sampleFacts, 
scalingFactor, numOffs, estDistinct));
-                               return new EstimationFactors(estDistinct, 
numOffs, mostFrequentOffsetCount, sampleFacts.frequencies,
-                                       sampleFacts.numSingle, numRows, 
scaledRuns, sampleFacts.lossy, sampleFacts.zeroIsMostFrequent,
-                                       overallSparsity, tupleSparsity);
-                       }
-                       else {
-                               return new EstimationFactors(estDistinct, 
numOffs, mostFrequentOffsetCount, sampleFacts.frequencies,
-                                       sampleFacts.numSingle, numRows, 
sampleFacts.lossy, sampleFacts.zeroIsMostFrequent, overallSparsity,
-                                       tupleSparsity);
-                       }
-               }
-               catch(Exception e) {
-                       throw new DMLCompressionException("Invalid construction 
of estimation factors with observed values:\n"
-                               + Arrays.toString(colIndexes) + " " + nnz + " " 
+ numOffs + "  " + estDistinct + "  "
-                               + maxLargestInstanceCount + "  " + 
scaledLargestInstanceCount + " " + mostFrequentOffsetCount + " "
-                               + overallSparsity + " " + tupleSparsity + "\n" 
+ nnzCols[colIndexes[0]], e);
+
+               if(_cs.isRLEAllowed()) {
+                       final int scaledRuns = Math.max(estDistinct, 
calculateRuns(sampleFacts, scalingFactor, numOffs, estDistinct));
+                       return new EstimationFactors(estDistinct, numOffs, 
mostFrequentOffsetCount, sampleFacts.frequencies,
+                               sampleFacts.numSingle, numRows, scaledRuns, 
sampleFacts.lossy, sampleFacts.zeroIsMostFrequent,
+                               overallSparsity, tupleSparsity);
                }
+               else
+                       return new EstimationFactors(estDistinct, numOffs, 
mostFrequentOffsetCount, sampleFacts.frequencies,
+                               sampleFacts.numSingle, numRows, 
sampleFacts.lossy, sampleFacts.zeroIsMostFrequent, overallSparsity,
+                               tupleSparsity);
 
        }
 
diff --git 
a/src/main/java/org/apache/sysds/runtime/compress/estim/EstimationFactors.java 
b/src/main/java/org/apache/sysds/runtime/compress/estim/EstimationFactors.java
index d8da887bfd..005d79cd74 100644
--- 
a/src/main/java/org/apache/sysds/runtime/compress/estim/EstimationFactors.java
+++ 
b/src/main/java/org/apache/sysds/runtime/compress/estim/EstimationFactors.java
@@ -93,6 +93,10 @@ public class EstimationFactors {
                        throw new DMLCompressionException("Num vals cannot be 
greater than num offs");
        }
 
+       public int[] getFrequencies(){
+               return frequencies;
+       }
+
        @Override
        public String toString() {
                StringBuilder sb = new StringBuilder();
diff --git 
a/src/main/java/org/apache/sysds/runtime/compress/estim/encoding/DenseEncoding.java
 
b/src/main/java/org/apache/sysds/runtime/compress/estim/encoding/DenseEncoding.java
index e29561018d..2a14c849ee 100644
--- 
a/src/main/java/org/apache/sysds/runtime/compress/estim/encoding/DenseEncoding.java
+++ 
b/src/main/java/org/apache/sysds/runtime/compress/estim/encoding/DenseEncoding.java
@@ -23,7 +23,6 @@ import java.util.HashMap;
 import java.util.Map;
 
 import org.apache.sysds.runtime.compress.CompressionSettings;
-import org.apache.sysds.runtime.compress.DMLCompressionException;
 import org.apache.sysds.runtime.compress.colgroup.mapping.AMapToData;
 import org.apache.sysds.runtime.compress.colgroup.mapping.MapToFactory;
 import org.apache.sysds.runtime.compress.colgroup.offset.AIterator;
@@ -35,6 +34,7 @@ public class DenseEncoding implements IEncode {
 
        public DenseEncoding(AMapToData map) {
                this.map = map;
+               map.getCounts();
        }
 
        @Override
@@ -107,29 +107,23 @@ public class DenseEncoding implements IEncode {
        }
 
        protected DenseEncoding combineDense(final DenseEncoding other) {
-               try {
+               if(map == other.map) // same object
+                       return this; // unlikely to happen but cheap to compute
 
-                       if(map == other.map) // same object
-                               return this; // unlikely to happen but cheap to 
compute
+               final AMapToData lm = map;
+               final AMapToData rm = other.map;
 
-                       final AMapToData lm = map;
-                       final AMapToData rm = other.map;
-
-                       final int nVL = lm.getUnique();
-                       final int nVR = rm.getUnique();
-                       final int size = map.size();
-                       final int maxUnique = nVL * nVR;
+               final int nVL = lm.getUnique();
+               final int nVR = rm.getUnique();
+               final int size = map.size();
+               final int maxUnique = nVL * nVR;
 
-                       final AMapToData ret = MapToFactory.create(size, 
maxUnique);
+               final AMapToData ret = MapToFactory.create(size, maxUnique);
 
-                       if(maxUnique > size)
-                               return combineDenseWithHashMap(lm, rm, size, 
nVL, ret);
-                       else
-                               return combineDenseWithMapToData(lm, rm, size, 
nVL, ret, maxUnique);
-               }
-               catch(Exception e) {
-                       throw new DMLCompressionException("Failed to combine 
two dense\n" + this + "\n" + other, e);
-               }
+               if(maxUnique > size)
+                       return combineDenseWithHashMap(lm, rm, size, nVL, ret);
+               else
+                       return combineDenseWithMapToData(lm, rm, size, nVL, 
ret, maxUnique);
        }
 
        protected final DenseEncoding combineDenseWithHashMap(final AMapToData 
lm, final AMapToData rm, final int size,
@@ -179,10 +173,11 @@ public class DenseEncoding implements IEncode {
                CompressionSettings cs) {
                int largestOffs = 0;
 
-               int[] counts = map.getCounts(new int[map.getUnique()]);
+               int[] counts = map.getCounts();
                for(int i = 0; i < counts.length; i++)
                        if(counts[i] > largestOffs)
                                largestOffs = counts[i];
+               
                if(cs.isRLEAllowed())
                        return new EstimationFactors(map.getUnique(), nRows, 
largestOffs, counts, 0, nRows, map.countRuns(), false,
                                false, matrixSparsity, tupleSparsity);
diff --git 
a/src/main/java/org/apache/sysds/runtime/compress/estim/encoding/IEncode.java 
b/src/main/java/org/apache/sysds/runtime/compress/estim/encoding/EncodingFactory.java
similarity index 81%
copy from 
src/main/java/org/apache/sysds/runtime/compress/estim/encoding/IEncode.java
copy to 
src/main/java/org/apache/sysds/runtime/compress/estim/encoding/EncodingFactory.java
index f84e188b03..e246817582 100644
--- 
a/src/main/java/org/apache/sysds/runtime/compress/estim/encoding/IEncode.java
+++ 
b/src/main/java/org/apache/sysds/runtime/compress/estim/encoding/EncodingFactory.java
@@ -22,15 +22,10 @@ package org.apache.sysds.runtime.compress.estim.encoding;
 import java.util.Arrays;
 
 import org.apache.commons.lang.NotImplementedException;
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
-import org.apache.sysds.runtime.compress.CompressionSettings;
-import org.apache.sysds.runtime.compress.DMLCompressionException;
 import org.apache.sysds.runtime.compress.colgroup.mapping.AMapToData;
 import org.apache.sysds.runtime.compress.colgroup.mapping.MapToFactory;
 import org.apache.sysds.runtime.compress.colgroup.offset.AOffset;
 import org.apache.sysds.runtime.compress.colgroup.offset.OffsetFactory;
-import org.apache.sysds.runtime.compress.estim.EstimationFactors;
 import org.apache.sysds.runtime.compress.readers.ReaderColumnSelection;
 import org.apache.sysds.runtime.compress.utils.DblArray;
 import org.apache.sysds.runtime.compress.utils.DblArrayCountHashMap;
@@ -40,13 +35,16 @@ import org.apache.sysds.runtime.data.DenseBlock;
 import org.apache.sysds.runtime.data.SparseBlock;
 import org.apache.sysds.runtime.matrix.data.MatrixBlock;
 
-/**
- * This interface covers an intermediate encoding for the samples to improve 
the efficiency of the joining of sample
- * column groups.
- */
-public interface IEncode {
-       static final Log LOG = LogFactory.getLog(IEncode.class.getName());
+public interface EncodingFactory {
 
+       /**
+        * Encode a list of columns together from the input matrix, as if it is 
cocoded.
+        * 
+        * @param m          The matrix input to encode
+        * @param transposed If the matrix is transposed in memory
+        * @param rowCols    The list of columns to encode.
+        * @return An encoded format of the information of the columns.
+        */
        public static IEncode createFromMatrixBlock(MatrixBlock m, boolean 
transposed, int[] rowCols) {
                if(m.isEmpty())
                        return new EmptyEncoding();
@@ -56,14 +54,46 @@ public interface IEncode {
                        return createWithReader(m, rowCols, transposed);
        }
 
+       /**
+        * Encode a full delta representation of the matrix input taking all 
rows into account.
+        * 
+        * Note the input matrix should not be delta encoded, but instead while 
processing, enforcing that we do not allocate
+        * more memory.
+        * 
+        * @param m          The input matrix that is not delta encoded and 
should not be modified
+        * @param transposed If the input matrix is transposed.
+        * @param rowCols    The list of columns to encode
+        * @return A delta encoded encoding.
+        */
        public static IEncode createFromMatrixBlockDelta(MatrixBlock m, boolean 
transposed, int[] rowCols) {
-               return createFromMatrixBlockDelta(m, transposed, rowCols, 
transposed ? m.getNumColumns() : m.getNumRows());
+               final int sampleSize = transposed ? m.getNumColumns() : 
m.getNumRows();
+               return createFromMatrixBlockDelta(m, transposed, rowCols, 
sampleSize);
        }
 
-       public static IEncode createFromMatrixBlockDelta(MatrixBlock m, boolean 
transposed, int[] rowCols, int nVals) {
+       /**
+        * Encode a delta representation of the matrix input taking the first 
"sampleSize" rows into account.
+        * 
+        * Note the input matrix should not be delta encoded, but instead while 
processing, enforcing that we do not allocate
+        * more memory.
+        * 
+        * @param m          Input matrix that is not delta encoded and should 
not be modified
+        * @param transposed If the input matrix is transposed.
+        * @param rowCols    The list of columns to encode
+        * @param sampleSize The number of rows to consider for the delta 
encoding (from the beginning)
+        * @return A delta encoded encoding.
+        */
+       public static IEncode createFromMatrixBlockDelta(MatrixBlock m, boolean 
transposed, int[] rowCols, int sampleSize) {
                throw new NotImplementedException();
        }
 
+       /**
+        * Create encoding of a single specific column inside the matrix input.
+        * 
+        * @param m          The Matrix to encode a column from
+        * @param transposed If the matrix is in transposed format.
+        * @param rowCol     The column index to encode
+        * @return An encoded format of the information of this column.
+        */
        public static IEncode createFromMatrixBlock(MatrixBlock m, boolean 
transposed, int rowCol) {
                if(m.isEmpty())
                        return new EmptyEncoding();
@@ -80,8 +110,10 @@ public interface IEncode {
        }
 
        private static IEncode createFromDenseTransposed(MatrixBlock m, int 
row) {
-               final DoubleCountHashMap map = new DoubleCountHashMap(16);
                final DenseBlock db = m.getDenseBlock();
+               if(!db.isContiguous())
+                       throw new NotImplementedException("Not Implemented non 
contiguous dense matrix encoding for sample");
+               final DoubleCountHashMap map = new DoubleCountHashMap(16);
                final int off = db.pos(row);
                final int nCol = m.getNumColumns();
                final int end = off + nCol;
@@ -148,12 +180,15 @@ public interface IEncode {
 
                final int nCol = m.getNumColumns();
                if(alen - apos > nCol / 4) { // return a dense encoding
-                       final AMapToData d = MapToFactory.create(nCol, nUnique 
+ 1);
+                       // If the row was full but the overall matrix is sparse.
+                       final int correct = (alen - apos == m.getNumColumns()) 
? 0 : 1;
+                       final AMapToData d = MapToFactory.create(nCol, nUnique 
+ correct);
                        // Since the dictionary is allocated with zero then we 
exploit that here and
                        // only iterate through non zero entries.
                        for(int i = apos; i < alen; i++)
-                               // plus one to assign unique IDs.
-                               d.set(aix[i], map.get(avals[i]) + 1);
+                               // correction one to assign unique IDs taking 
into account zero
+                               d.set(aix[i], map.get(avals[i]) + correct);
+                       // the rest is automatically set to zero.
 
                        return new DenseEncoding(d);
                }
@@ -167,15 +202,9 @@ public interface IEncode {
 
                        // Iteration 3 of non zero indexes, make a Offset 
Encoding to know what cells are zero and not.
                        // not done yet
-                       AOffset o = OffsetFactory.createOffset(aix, apos, alen);
+                       final AOffset o = OffsetFactory.createOffset(aix, apos, 
alen);
                        final int zero = m.getNumColumns() - o.getSize();
-                       try {
-                               return new SparseEncoding(d, o, zero, 
m.getNumColumns());
-                       }
-                       catch(Exception e) {
-                               throw new 
DMLCompressionException(Arrays.toString(aix), e);
-                       }
-
+                       return new SparseEncoding(d, o, zero, 
m.getNumColumns());
                }
        }
 
@@ -343,35 +372,4 @@ public interface IEncode {
 
                return new SparseEncoding(d, o, zeros, nRows);
        }
-
-       /**
-        * Combine two encodings, note it should be guaranteed by the caller 
that the number of unique multiplied does not
-        * overflow Integer.
-        * 
-        * @param e The other side to combine with
-        * @return The combined encoding
-        */
-       public IEncode combine(IEncode e);
-
-       public int getUnique();
-       
-       //  * @param cols           The cols involved
-       /**
-        * Extract the compression facts for this column group.
-        * 
-        * @param nRows          The total number of rows
-        * @param tupleSparsity  The Sparsity of the unique tuples
-        * @param matrixSparsity The matrix sparsity
-        * @param cs             The compression settings
-        * @return A EstimationFactors object
-        */
-       public EstimationFactors extractFacts(int nRows, double tupleSparsity, 
double matrixSparsity,
-               CompressionSettings cs);
-
-       /**
-        * Signify if the counts are including zero or without zero.
-        * 
-        * @return is dense
-        */
-       public abstract boolean isDense();
 }
diff --git 
a/src/main/java/org/apache/sysds/runtime/compress/estim/encoding/IEncode.java 
b/src/main/java/org/apache/sysds/runtime/compress/estim/encoding/IEncode.java
index f84e188b03..b3daf3b5c3 100644
--- 
a/src/main/java/org/apache/sysds/runtime/compress/estim/encoding/IEncode.java
+++ 
b/src/main/java/org/apache/sysds/runtime/compress/estim/encoding/IEncode.java
@@ -19,26 +19,10 @@
 
 package org.apache.sysds.runtime.compress.estim.encoding;
 
-import java.util.Arrays;
-
-import org.apache.commons.lang.NotImplementedException;
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.sysds.runtime.compress.CompressionSettings;
-import org.apache.sysds.runtime.compress.DMLCompressionException;
-import org.apache.sysds.runtime.compress.colgroup.mapping.AMapToData;
-import org.apache.sysds.runtime.compress.colgroup.mapping.MapToFactory;
-import org.apache.sysds.runtime.compress.colgroup.offset.AOffset;
-import org.apache.sysds.runtime.compress.colgroup.offset.OffsetFactory;
 import org.apache.sysds.runtime.compress.estim.EstimationFactors;
-import org.apache.sysds.runtime.compress.readers.ReaderColumnSelection;
-import org.apache.sysds.runtime.compress.utils.DblArray;
-import org.apache.sysds.runtime.compress.utils.DblArrayCountHashMap;
-import org.apache.sysds.runtime.compress.utils.DoubleCountHashMap;
-import org.apache.sysds.runtime.compress.utils.IntArrayList;
-import org.apache.sysds.runtime.data.DenseBlock;
-import org.apache.sysds.runtime.data.SparseBlock;
-import org.apache.sysds.runtime.matrix.data.MatrixBlock;
 
 /**
  * This interface covers an intermediate encoding for the samples to improve 
the efficiency of the joining of sample
@@ -47,303 +31,6 @@ import org.apache.sysds.runtime.matrix.data.MatrixBlock;
 public interface IEncode {
        static final Log LOG = LogFactory.getLog(IEncode.class.getName());
 
-       public static IEncode createFromMatrixBlock(MatrixBlock m, boolean 
transposed, int[] rowCols) {
-               if(m.isEmpty())
-                       return new EmptyEncoding();
-               else if(rowCols.length == 1)
-                       return createFromMatrixBlock(m, transposed, rowCols[0]);
-               else
-                       return createWithReader(m, rowCols, transposed);
-       }
-
-       public static IEncode createFromMatrixBlockDelta(MatrixBlock m, boolean 
transposed, int[] rowCols) {
-               return createFromMatrixBlockDelta(m, transposed, rowCols, 
transposed ? m.getNumColumns() : m.getNumRows());
-       }
-
-       public static IEncode createFromMatrixBlockDelta(MatrixBlock m, boolean 
transposed, int[] rowCols, int nVals) {
-               throw new NotImplementedException();
-       }
-
-       public static IEncode createFromMatrixBlock(MatrixBlock m, boolean 
transposed, int rowCol) {
-               if(m.isEmpty())
-                       return new EmptyEncoding();
-               else if(transposed) {
-                       if(m.isInSparseFormat())
-                               return createFromSparseTransposed(m, rowCol);
-                       else
-                               return createFromDenseTransposed(m, rowCol);
-               }
-               else if(m.isInSparseFormat())
-                       return createFromSparse(m, rowCol);
-               else
-                       return createFromDense(m, rowCol);
-       }
-
-       private static IEncode createFromDenseTransposed(MatrixBlock m, int 
row) {
-               final DoubleCountHashMap map = new DoubleCountHashMap(16);
-               final DenseBlock db = m.getDenseBlock();
-               final int off = db.pos(row);
-               final int nCol = m.getNumColumns();
-               final int end = off + nCol;
-               final double[] vals = db.values(row);
-
-               // Iteration 1, make Count HashMap.
-               for(int i = off; i < end; i++) // sequential access
-                       map.increment(vals[i]);
-
-               final int nUnique = map.size();
-
-               if(nUnique == 1)
-                       return new ConstEncoding(m.getNumColumns());
-
-               if(map.getOrDefault(0, -1) > nCol / 4) {
-                       map.replaceWithUIDsNoZero();
-                       final int zeroCount = map.get(0);
-                       final int nV = nCol - zeroCount;
-                       final IntArrayList offsets = new IntArrayList(nV);
-
-                       final AMapToData d = MapToFactory.create(nV, nUnique - 
1);
-
-                       // for(int i = off, r = 0, di = 0; i < end; i += nCol, 
r++){
-                       for(int i = off, r = 0, di = 0; i < end; i++, r++) {
-                               if(vals[i] != 0) {
-                                       offsets.appendValue(r);
-                                       d.set(di++, map.get(vals[i]));
-                               }
-                       }
-
-                       final AOffset o = OffsetFactory.createOffset(offsets);
-                       return new SparseEncoding(d, o, zeroCount, nCol);
-               }
-               else {
-                       map.replaceWithUIDs();
-                       // Create output map
-                       final AMapToData d = MapToFactory.create(nCol, nUnique);
-
-                       // Iteration 2, make final map
-                       for(int i = off, r = 0; i < end; i++, r++)
-                               d.set(r, map.get(vals[i]));
-
-                       return new DenseEncoding(d);
-               }
-       }
-
-       private static IEncode createFromSparseTransposed(MatrixBlock m, int 
row) {
-               final DoubleCountHashMap map = new DoubleCountHashMap(16);
-               final SparseBlock sb = m.getSparseBlock();
-               if(sb.isEmpty(row))
-                       return new EmptyEncoding();
-               final int apos = sb.pos(row);
-               final int alen = sb.size(row) + apos;
-               final double[] avals = sb.values(row);
-               final int[] aix = sb.indexes(row);
-
-               // Iteration 1 of non zero values, make Count HashMap.
-               for(int i = apos; i < alen; i++) // sequential of non zero 
cells.
-                       map.increment(avals[i]);
-
-               final int nUnique = map.size();
-
-               map.replaceWithUIDs();
-
-               final int nCol = m.getNumColumns();
-               if(alen - apos > nCol / 4) { // return a dense encoding
-                       final AMapToData d = MapToFactory.create(nCol, nUnique 
+ 1);
-                       // Since the dictionary is allocated with zero then we 
exploit that here and
-                       // only iterate through non zero entries.
-                       for(int i = apos; i < alen; i++)
-                               // plus one to assign unique IDs.
-                               d.set(aix[i], map.get(avals[i]) + 1);
-
-                       return new DenseEncoding(d);
-               }
-               else { // return a sparse encoding
-                       // Create output map
-                       final AMapToData d = MapToFactory.create(alen - apos, 
nUnique);
-
-                       // Iteration 2 of non zero values, make either a 
IEncode Dense or sparse map.
-                       for(int i = apos, j = 0; i < alen; i++, j++)
-                               d.set(j, map.get(avals[i]));
-
-                       // Iteration 3 of non zero indexes, make a Offset 
Encoding to know what cells are zero and not.
-                       // not done yet
-                       AOffset o = OffsetFactory.createOffset(aix, apos, alen);
-                       final int zero = m.getNumColumns() - o.getSize();
-                       try {
-                               return new SparseEncoding(d, o, zero, 
m.getNumColumns());
-                       }
-                       catch(Exception e) {
-                               throw new 
DMLCompressionException(Arrays.toString(aix), e);
-                       }
-
-               }
-       }
-
-       private static IEncode createFromDense(MatrixBlock m, int col) {
-               final DenseBlock db = m.getDenseBlock();
-               if(!db.isContiguous())
-                       throw new NotImplementedException("Not Implemented non 
contiguous dense matrix encoding for sample");
-               final DoubleCountHashMap map = new DoubleCountHashMap(16);
-               final int off = col;
-               final int nCol = m.getNumColumns();
-               final int nRow = m.getNumRows();
-               final int end = off + nRow * nCol;
-               final double[] vals = m.getDenseBlockValues();
-
-               // Iteration 1, make Count HashMap.
-               for(int i = off; i < end; i += nCol) // jump down through rows.
-                       map.increment(vals[i]);
-
-               final int nUnique = map.size();
-               if(nUnique == 1)
-                       return new ConstEncoding(m.getNumColumns());
-
-               if(map.getOrDefault(0, -1) > nRow / 4) {
-                       map.replaceWithUIDsNoZero();
-                       final int zeroCount = map.get(0);
-                       final int nV = m.getNumRows() - zeroCount;
-                       final IntArrayList offsets = new IntArrayList(nV);
-
-                       final AMapToData d = MapToFactory.create(nV, nUnique - 
1);
-
-                       for(int i = off, r = 0, di = 0; i < end; i += nCol, 
r++) {
-                               if(vals[i] != 0) {
-                                       offsets.appendValue(r);
-                                       d.set(di++, map.get(vals[i]));
-                               }
-                       }
-
-                       final AOffset o = OffsetFactory.createOffset(offsets);
-
-                       return new SparseEncoding(d, o, zeroCount, nRow);
-               }
-               else {
-                       // Allocate counts, and iterate once to replace counts 
with u ids
-                       map.replaceWithUIDs();
-                       final AMapToData d = MapToFactory.create(nRow, nUnique);
-                       // Iteration 2, make final map
-                       for(int i = off, r = 0; i < end; i += nCol, r++)
-                               d.set(r, map.get(vals[i]));
-                       return new DenseEncoding(d);
-               }
-       }
-
-       private static IEncode createFromSparse(MatrixBlock m, int col) {
-
-               final DoubleCountHashMap map = new DoubleCountHashMap(16);
-               final SparseBlock sb = m.getSparseBlock();
-
-               final double guessedNumberOfNonZero = Math.min(4, 
Math.ceil((double) m.getNumRows() * m.getSparsity()));
-               final IntArrayList offsets = new IntArrayList((int) 
guessedNumberOfNonZero);
-
-               // Iteration 1 of non zero values, make Count HashMap.
-               for(int r = 0; r < m.getNumRows(); r++) { // Horrible 
performance but ... it works.
-                       if(sb.isEmpty(r))
-                               continue;
-                       final int apos = sb.pos(r);
-                       final int alen = sb.size(r) + apos;
-                       final int[] aix = sb.indexes(r);
-                       final int index = Arrays.binarySearch(aix, apos, alen, 
col);
-                       if(index >= 0) {
-                               offsets.appendValue(r);
-                               map.increment(sb.values(r)[index]);
-                       }
-               }
-               if(offsets.size() == 0)
-                       return new EmptyEncoding();
-
-               final int nUnique = map.size();
-               map.replaceWithUIDs();
-
-               final AMapToData d = MapToFactory.create(offsets.size(), 
nUnique);
-
-               // Iteration 2 of non zero values, make either a IEncode Dense 
or sparse map.
-               for(int off = 0, r = 0; off < offsets.size(); r++) {
-                       if(sb.isEmpty(r))
-                               continue;
-                       final int apos = sb.pos(r);
-                       final int alen = sb.size(r) + apos;
-                       final int[] aix = sb.indexes(r);
-                       // Performance hit because of binary search for each 
row.
-                       final int index = Arrays.binarySearch(aix, apos, alen, 
col);
-                       if(index >= 0)
-                               d.set(off++, map.get(sb.values(r)[index]));
-               }
-
-               // Iteration 3 of non zero indexes, make a Offset Encoding to 
know what cells are zero and not.
-               AOffset o = OffsetFactory.createOffset(offsets);
-
-               final int zero = m.getNumRows() - offsets.size();
-               return new SparseEncoding(d, o, zero, m.getNumRows());
-       }
-
-       private static IEncode createWithReader(MatrixBlock m, int[] rowCols, 
boolean transposed) {
-               final ReaderColumnSelection reader1 = 
ReaderColumnSelection.createReader(m, rowCols, transposed);
-               final int nRows = transposed ? m.getNumColumns() : 
m.getNumRows();
-               final DblArrayCountHashMap map = new DblArrayCountHashMap(16, 
rowCols.length);
-               final IntArrayList offsets = new IntArrayList();
-               DblArray cellVals = reader1.nextRow();
-
-               // Iteration 1, make Count HashMap, and offsets.
-               while(cellVals != null) {
-                       map.increment(cellVals);
-                       offsets.appendValue(reader1.getCurrentRowIndex());
-                       cellVals = reader1.nextRow();
-               }
-
-               if(offsets.size() == 0)
-                       return new EmptyEncoding();
-               else if(map.size() == 1 && offsets.size() == nRows)
-                       return new ConstEncoding(nRows);
-
-               map.replaceWithUIDs();
-               if(offsets.size() < nRows / 4) {
-                       // Output encoded sparse since there is very empty.
-                       final int zeros = nRows - offsets.size();
-                       return createWithReaderSparse(m, map, zeros, rowCols, 
offsets, nRows, transposed);
-               }
-               else
-                       return createWithReaderDense(m, map, rowCols, nRows, 
transposed, offsets.size() < nRows);
-
-       }
-
-       private static IEncode createWithReaderDense(MatrixBlock m, 
DblArrayCountHashMap map, int[] rowCols, int nRows,
-               boolean transposed, boolean zero) {
-               // Iteration 2,
-               final int unique = map.size() + (zero ? 1 : 0);
-               final ReaderColumnSelection reader2 = 
ReaderColumnSelection.createReader(m, rowCols, transposed);
-               final AMapToData d = MapToFactory.create(nRows, unique);
-
-               DblArray cellVals;
-               if(zero)
-                       while((cellVals = reader2.nextRow()) != null)
-                               d.set(reader2.getCurrentRowIndex(), 
map.get(cellVals) + 1);
-               else
-                       while((cellVals = reader2.nextRow()) != null)
-                               d.set(reader2.getCurrentRowIndex(), 
map.get(cellVals));
-
-               return new DenseEncoding(d);
-       }
-
-       private static IEncode createWithReaderSparse(MatrixBlock m, 
DblArrayCountHashMap map, int zeros, int[] rowCols,
-               IntArrayList offsets, int nRows, boolean transposed) {
-               final ReaderColumnSelection reader2 = 
ReaderColumnSelection.createReader(m, rowCols, transposed);
-               DblArray cellVals = reader2.nextRow();
-
-               final AMapToData d = MapToFactory.create(offsets.size(), 
map.size());
-
-               int i = 0;
-               // Iterator 2 of non zero tuples.
-               while(cellVals != null) {
-                       d.set(i++, map.get(cellVals));
-                       cellVals = reader2.nextRow();
-               }
-
-               final AOffset o = OffsetFactory.createOffset(offsets);
-
-               return new SparseEncoding(d, o, zeros, nRows);
-       }
-
        /**
         * Combine two encodings, note it should be guaranteed by the caller 
that the number of unique multiplied does not
         * overflow Integer.
@@ -353,9 +40,13 @@ public interface IEncode {
         */
        public IEncode combine(IEncode e);
 
+       /**
+        * Get the number of unique values in this encoding
+        * 
+        * @return The number of unique values.
+        */
        public int getUnique();
-       
-       //  * @param cols           The cols involved
+
        /**
         * Extract the compression facts for this column group.
         * 
diff --git 
a/src/main/java/org/apache/sysds/runtime/compress/estim/encoding/SparseEncoding.java
 
b/src/main/java/org/apache/sysds/runtime/compress/estim/encoding/SparseEncoding.java
index a0cac41ad4..3d6c4180e6 100644
--- 
a/src/main/java/org/apache/sysds/runtime/compress/estim/encoding/SparseEncoding.java
+++ 
b/src/main/java/org/apache/sysds/runtime/compress/estim/encoding/SparseEncoding.java
@@ -20,7 +20,6 @@
 package org.apache.sysds.runtime.compress.estim.encoding;
 
 import org.apache.sysds.runtime.compress.CompressionSettings;
-import org.apache.sysds.runtime.compress.DMLCompressionException;
 import org.apache.sysds.runtime.compress.colgroup.mapping.AMapToData;
 import org.apache.sysds.runtime.compress.colgroup.mapping.MapToFactory;
 import org.apache.sysds.runtime.compress.colgroup.offset.AIterator;
@@ -49,8 +48,6 @@ public class SparseEncoding implements IEncode {
                this.off = off;
                this.zeroCount = zeroCount;
                this.nRows = nRows;
-               if(off.getOffsetToLast() > nRows)
-                       throw new DMLCompressionException("Invalid Sparse 
Encoding because offsets are calculated incorrectly");
        }
 
        @Override
@@ -68,8 +65,6 @@ public class SparseEncoding implements IEncode {
        }
 
        protected IEncode combineSparse(SparseEncoding e) {
-               if(e.nRows != nRows)
-                       throw new DMLCompressionException("invalid number of 
rows");
                final int maxUnique = e.getUnique() * getUnique();
                final int[] d = new int[maxUnique - 1];
 
@@ -93,14 +88,9 @@ public class SparseEncoding implements IEncode {
                final int unique = combineSparse(map, e.map, itl, itr, retOff, 
tmpVals, fl, fr, nVl, nVr, d);
 
                if(retOff.size() < nRows / 4) {
-                       try {
-                               final AOffset o = 
OffsetFactory.createOffset(retOff);
-                               final AMapToData retMap = 
MapToFactory.create(tmpVals.size(), tmpVals.extractValues(), unique - 1);
-                               return new SparseEncoding(retMap, o, nRows - 
retOff.size(), nRows);
-                       }
-                       catch(Exception ex) {
-                               throw new DMLCompressionException("Failed 
combining sparse " + retOff + " " + this + "  " + e, ex);
-                       }
+                       final AOffset o = OffsetFactory.createOffset(retOff);
+                       final AMapToData retMap = 
MapToFactory.create(tmpVals.size(), tmpVals.extractValues(), unique - 1);
+                       return new SparseEncoding(retMap, o, nRows - 
retOff.size(), nRows);
                }
                else {
                        // there will always be a zero therefore unique is not 
subtracted one.
@@ -112,8 +102,9 @@ public class SparseEncoding implements IEncode {
                }
        }
 
-       private static int combineSparse(AMapToData lMap, AMapToData rMap, 
AIterator itl, AIterator itr, IntArrayList retOff,
-               IntArrayList tmpVals, int fl, int fr, int nVl, int nVr, int[] 
d) {
+       private static int combineSparse(AMapToData lMap, AMapToData rMap, 
AIterator itl, AIterator itr,
+               final IntArrayList retOff, final IntArrayList tmpVals, final 
int fl, final int fr, final int nVl, final int nVr,
+               final int[] d) {
 
                final int defR = (nVr - 1) * nVl;
                final int defL = nVl - 1;
@@ -122,17 +113,22 @@ public class SparseEncoding implements IEncode {
                int ir = itr.value();
 
                if(il == fl && ir == fr) { // easy both only have one value
-                       if(fl == fr) {// both on same row
-                               final int nv = 
lMap.getIndex(itl.getDataIndex()) + rMap.getIndex(itr.getDataIndex()) * nVl;
-                               return addVal(nv, il, d, newUID, tmpVals, 
retOff);
+                       tmpVals.appendValue(0);
+                       if(fl == fr) { // both on same row
+                               retOff.appendValue(fl);
+                               return 2;
                        }
-                       else if(fl < fr) {// fl is first
-                               newUID = 
addVal(lMap.getIndex(itl.getDataIndex()) + defR, il, d, newUID, tmpVals, 
retOff);
-                               return addVal(rMap.getIndex(itr.getDataIndex()) 
* nVl + defL, ir, d, newUID, tmpVals, retOff);
+                       // Known two locations to add.
+                       tmpVals.appendValue(1);
+                       if(fl < fr) {// fl is first
+                               retOff.appendValue(fl);
+                               retOff.appendValue(fr);
+                               return 3;
                        }
                        else {// fl is last
-                               newUID = 
addVal(rMap.getIndex(itr.getDataIndex()) * nVl + defL, ir, d, newUID, tmpVals, 
retOff);
-                               return addVal(lMap.getIndex(itl.getDataIndex()) 
+ defR, il, d, newUID, tmpVals, retOff);
+                               retOff.appendValue(fr);
+                               retOff.appendValue(fl);
+                               return 3;
                        }
                }
 
@@ -161,7 +157,8 @@ public class SparseEncoding implements IEncode {
        }
 
        private static int combineSparseTail(AMapToData lMap, AMapToData rMap, 
AIterator itl, AIterator itr,
-               IntArrayList retOff, IntArrayList tmpVals, int fl, int fr, int 
nVl, int nVr, int[] d, int newUID) {
+               final IntArrayList retOff, final IntArrayList tmpVals, final 
int fl, final int fr, final int nVl, final int nVr,
+               final int[] d, int newUID) {
                final int defR = (nVr - 1) * nVl;
                final int defL = nVl - 1;
                int il = itl.value();
@@ -191,6 +188,8 @@ public class SparseEncoding implements IEncode {
                                newUID = addVal(nv, il, d, newUID, tmpVals, 
retOff);
                                il = itl.next();
                        }
+                       final int nv = lMap.getIndex(itl.getDataIndex()) + defR;
+                       newUID = addVal(nv, il, d, newUID, tmpVals, retOff);
                }
                else if(ir < fr) {
                        while(ir < fl && ir < fr) {
@@ -198,6 +197,7 @@ public class SparseEncoding implements IEncode {
                                newUID = addVal(nv, ir, d, newUID, tmpVals, 
retOff);
                                ir = itr.next();
                        }
+
                        if(fr == fl) {
                                final int nv = 
lMap.getIndex(itl.getDataIndex()) + rMap.getIndex(itr.getDataIndex()) * nVl;
                                return addVal(nv, ir, d, newUID, tmpVals, 
retOff);
@@ -211,11 +211,14 @@ public class SparseEncoding implements IEncode {
                                final int nv = 
lMap.getIndex(itl.getDataIndex()) + defR;
                                newUID = addVal(nv, fl, d, newUID, tmpVals, 
retOff);
                        }
+
                        while(ir < fr) {
                                final int nv = 
rMap.getIndex(itr.getDataIndex()) * nVl + defL;
                                newUID = addVal(nv, ir, d, newUID, tmpVals, 
retOff);
                                ir = itr.next();
                        }
+                       final int nv = rMap.getIndex(itr.getDataIndex()) * nVl 
+ defL;
+                       newUID = addVal(nv, ir, d, newUID, tmpVals, retOff);
                }
 
                return newUID;
diff --git 
a/src/main/java/org/apache/sysds/runtime/compress/estim/sample/SampleEstimatorFactory.java
 
b/src/main/java/org/apache/sysds/runtime/compress/estim/sample/SampleEstimatorFactory.java
index ff2787eda9..39cb706e34 100644
--- 
a/src/main/java/org/apache/sysds/runtime/compress/estim/sample/SampleEstimatorFactory.java
+++ 
b/src/main/java/org/apache/sysds/runtime/compress/estim/sample/SampleEstimatorFactory.java
@@ -30,7 +30,8 @@ public interface SampleEstimatorFactory {
 
        public enum EstimationType {
                HassAndStokes, ShlosserEstimator, //
-               ShlosserJackknifeEstimator, SmoothedJackknifeEstimator
+               ShlosserJackknifeEstimator, SmoothedJackknifeEstimator,
+               HassAndStokesNoSolveCache,
        }
 
        /**
@@ -86,6 +87,8 @@ public interface SampleEstimatorFactory {
                                return 
ShlosserJackknifeEstimator.distinctCount(numVals, frequencies, invHist, nRows, 
sampleSize);
                        case SmoothedJackknifeEstimator:
                                return 
SmoothedJackknifeEstimator.distinctCount(numVals, invHist, nRows, sampleSize);
+                       case HassAndStokesNoSolveCache:
+                               return HassAndStokes.distinctCount(numVals, 
invHist, nRows, sampleSize, null);
                        case HassAndStokes:
                        default:
                                return HassAndStokes.distinctCount(numVals, 
invHist, nRows, sampleSize, solveCache);
diff --git 
a/src/main/java/org/apache/sysds/runtime/compress/estim/sample/ShlosserEstimator.java
 
b/src/main/java/org/apache/sysds/runtime/compress/estim/sample/ShlosserEstimator.java
index 7d456fcfd0..214d596394 100644
--- 
a/src/main/java/org/apache/sysds/runtime/compress/estim/sample/ShlosserEstimator.java
+++ 
b/src/main/java/org/apache/sysds/runtime/compress/estim/sample/ShlosserEstimator.java
@@ -48,9 +48,6 @@ public interface ShlosserEstimator {
                        denomSum += (++i) * q * p1;
                }
 
-               if(denomSum == 0 || denomSum == Double.POSITIVE_INFINITY || 
denomSum == Double.NaN)
-                       return (int) numVals;
-
                return (int) Math.round(numVals + freqCounts[0] * numberSum / 
denomSum);
 
        }
diff --git 
a/src/test/java/org/apache/sysds/test/component/compress/CompressedTestBase.java
 
b/src/test/java/org/apache/sysds/test/component/compress/CompressedTestBase.java
index a45e9e8f3c..eb99429740 100644
--- 
a/src/test/java/org/apache/sysds/test/component/compress/CompressedTestBase.java
+++ 
b/src/test/java/org/apache/sysds/test/component/compress/CompressedTestBase.java
@@ -407,7 +407,7 @@ public abstract class CompressedTestBase extends TestBase {
                        ov, null, null});
 
                CompressionSettingsBuilder sb = 
csb().setCostType(CostType.W_TREE);
-               InstructionTypeCounter itc = new InstructionTypeCounter(10, 10, 
0, 100, 10, 0, 0, 10, 50, false);
+               InstructionTypeCounter itc = new InstructionTypeCounter(10, 10, 
0, 100, 10, 0, 0, 10,  false);
                CostEstimatorBuilder csb = new CostEstimatorBuilder(itc);
                SparsityType st = SparsityType.THIRTY;
                ValueType vt = ValueType.ONE_HOT;
diff --git 
a/src/test/java/org/apache/sysds/test/component/compress/colgroup/ColGroupTest.java
 
b/src/test/java/org/apache/sysds/test/component/compress/colgroup/ColGroupTest.java
index 75f1f35ff8..51709eea5a 100644
--- 
a/src/test/java/org/apache/sysds/test/component/compress/colgroup/ColGroupTest.java
+++ 
b/src/test/java/org/apache/sysds/test/component/compress/colgroup/ColGroupTest.java
@@ -1827,7 +1827,6 @@ public class ColGroupTest extends ColGroupBase {
                return getColGroup(mbt, ct, nRow);
        }
 
-
        protected static AColGroup getColGroup(MatrixBlock mbt, CompressionType 
ct, int nRow) {
                try {
 
@@ -2003,8 +2002,9 @@ public class ColGroupTest extends ColGroupBase {
 
        @Test
        public void getCost() {
-               final ComputationCostEstimator cheap = new 
ComputationCostEstimator(1, 1, 1, 1, 1, 1, 1, false);
-               final ComputationCostEstimator expensive = new 
ComputationCostEstimator(100, 100, 100, 100, 100, 100, 100, true);
+               final ComputationCostEstimator cheap = new 
ComputationCostEstimator(1, 1, 1, 1, 1, 1, 1, 1, false);
+               final ComputationCostEstimator expensive = new 
ComputationCostEstimator(100, 100, 100, 100, 100, 100, 100, 100,
+                       true);
                double cb = base.getCost(cheap, nRow);
                double eb = base.getCost(expensive, nRow);
                double co = other.getCost(cheap, nRow);
diff --git 
a/src/test/java/org/apache/sysds/test/component/compress/cost/ComputeCostTest.java
 
b/src/test/java/org/apache/sysds/test/component/compress/cost/ComputeCostTest.java
index 5ecf517396..1e8d3e99b5 100644
--- 
a/src/test/java/org/apache/sysds/test/component/compress/cost/ComputeCostTest.java
+++ 
b/src/test/java/org/apache/sysds/test/component/compress/cost/ComputeCostTest.java
@@ -56,34 +56,34 @@ public class ComputeCostTest extends ACostTest {
        private static List<ACostEstimate> getComputeCosts() {
                List<ACostEstimate> costEstimators = new ArrayList<>();
                // dictionary op that is densifying (plus)
-               costEstimators.add(new ComputationCostEstimator(0, 0, 0, 0, 0, 
0, 1, true));
+               costEstimators.add(new ComputationCostEstimator(0, 0, 0, 0, 0, 
0, 1, 0, true));
                // Left multiplication
-               costEstimators.add(new ComputationCostEstimator(0, 0, 0, 1, 0, 
0, 0, false));
+               costEstimators.add(new ComputationCostEstimator(0, 0, 0, 1, 0, 
0, 0, 0, false));
                // Left Multiplication but the matrix is densified
-               costEstimators.add(new ComputationCostEstimator(0, 0, 0, 1, 0, 
0, 1, true));
+               costEstimators.add(new ComputationCostEstimator(0, 0, 0, 1, 0, 
0, 1, 0, true));
                // 10 LMM densified
-               costEstimators.add(new ComputationCostEstimator(0, 0, 0, 10, 0, 
0, 1, true));
+               costEstimators.add(new ComputationCostEstimator(0, 0, 0, 10, 0, 
0, 1, 0, true));
 
                // Right Matrix Multiplication
-               costEstimators.add(new ComputationCostEstimator(0, 0, 0, 0, 1, 
0, 0, false));
-               costEstimators.add(new ComputationCostEstimator(0, 0, 0, 0, 1, 
0, 1, true));
+               costEstimators.add(new ComputationCostEstimator(0, 0, 0, 0, 1, 
0, 0, 0, false));
+               costEstimators.add(new ComputationCostEstimator(0, 0, 0, 0, 1, 
0, 1, 0, true));
 
                // Decompression
-               costEstimators.add(new ComputationCostEstimator(0, 1, 0, 0, 0, 
0, 0, false));
+               costEstimators.add(new ComputationCostEstimator(0, 1, 0, 0, 0, 
0, 0, 0, false));
 
                // decompressing after densifying
-               costEstimators.add(new ComputationCostEstimator(0, 1, 0, 0, 0, 
0, 1, true));
+               costEstimators.add(new ComputationCostEstimator(0, 1, 0, 0, 0, 
0, 1, 0, true));
 
                // One Scan (this is the type that is used if we
                // require a process through the index structure) such as in 
rowSum.
-               costEstimators.add(new ComputationCostEstimator(1, 0, 0, 0, 0, 
0, 0, false));
-               costEstimators.add(new ComputationCostEstimator(1, 0, 0, 0, 0, 
0, 1, true));
+               costEstimators.add(new ComputationCostEstimator(1, 0, 0, 0, 0, 
0, 0, 0, false));
+               costEstimators.add(new ComputationCostEstimator(1, 0, 0, 0, 0, 
0, 1, 0, true));
 
                // Overlapping decompression
-               costEstimators.add(new ComputationCostEstimator(0, 0, 1, 0, 0, 
0, 0, false));
+               costEstimators.add(new ComputationCostEstimator(0, 0, 1, 0, 0, 
0, 0, 0, false));
 
                // Compressed Multiplication
-               costEstimators.add(new ComputationCostEstimator(0, 0, 0, 0, 0, 
1, 0, false));
+               costEstimators.add(new ComputationCostEstimator(0, 0, 0, 0, 0, 
1, 0, 0, false));
 
                return costEstimators;
        }
diff --git 
a/src/test/java/org/apache/sysds/test/component/compress/cost/InstructionCounterTest.java
 
b/src/test/java/org/apache/sysds/test/component/compress/cost/InstructionCounterTest.java
new file mode 100644
index 0000000000..957284d27a
--- /dev/null
+++ 
b/src/test/java/org/apache/sysds/test/component/compress/cost/InstructionCounterTest.java
@@ -0,0 +1,367 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.sysds.test.component.compress.cost;
+
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertFalse;
+import static org.junit.Assert.assertTrue;
+
+import org.apache.sysds.runtime.compress.cost.InstructionTypeCounter;
+import org.junit.Test;
+
+public class InstructionCounterTest {
+
+       @Test
+       public void testEmpty() {
+               InstructionTypeCounter c = new InstructionTypeCounter();
+               c.toString();
+               assertEquals(0, c.getScans());
+       }
+
+       @Test
+       public void testScans() {
+               InstructionTypeCounter c = new InstructionTypeCounter();
+               c.incScans();
+               c.incScans();
+               c.toString();
+               assertEquals(2, c.getScans());
+       }
+
+       @Test
+       public void testScans_2() {
+               InstructionTypeCounter c = new InstructionTypeCounter();
+               c.incScans(3);
+               c.toString();
+               assertEquals(3, c.getScans());
+       }
+
+       @Test
+       public void testDecompressions() {
+               InstructionTypeCounter c = new InstructionTypeCounter();
+               c.incScans();
+               c.incDecompressions();
+               c.toString();
+               assertEquals(1, c.getScans());
+               assertEquals(1, c.getDecompressions());
+       }
+
+       @Test
+       public void testDecompressions_2() {
+               InstructionTypeCounter c = new InstructionTypeCounter();
+               c.incScans();
+               c.incDecompressions(4);
+               c.toString();
+               assertEquals(1, c.getScans());
+               assertEquals(4, c.getDecompressions());
+       }
+
+       @Test
+       public void testOverlappingDecompressions() {
+               InstructionTypeCounter c = new InstructionTypeCounter();
+               c.incScans();
+               c.incDecompressions();
+               c.incOverlappingDecompressions();
+               c.toString();
+               assertEquals(1, c.getScans());
+               assertEquals(1, c.getDecompressions());
+               assertEquals(1, c.getOverlappingDecompressions());
+       }
+
+       @Test
+       public void testOverlappingDecompressions_2() {
+               InstructionTypeCounter c = new InstructionTypeCounter();
+               c.incScans();
+               c.incDecompressions();
+               c.incOverlappingDecompressions(42);
+               c.toString();
+               assertEquals(1, c.getScans());
+               assertEquals(1, c.getDecompressions());
+               assertEquals(42, c.getOverlappingDecompressions());
+       }
+
+       @Test
+       public void testLeftMultiplications() {
+               InstructionTypeCounter c = new InstructionTypeCounter();
+               c.incScans();
+               c.incDecompressions();
+               c.incOverlappingDecompressions();
+               c.incLMM();
+               c.toString();
+               assertEquals(1, c.getScans());
+               assertEquals(1, c.getDecompressions());
+               assertEquals(1, c.getOverlappingDecompressions());
+               assertEquals(1, c.getLeftMultiplications());
+
+       }
+
+       @Test
+       public void testLeftMultiplications_2() {
+               InstructionTypeCounter c = new InstructionTypeCounter();
+               c.incScans();
+               c.incDecompressions();
+               c.incOverlappingDecompressions();
+               c.incLMM(30);
+               c.toString();
+               assertEquals(1, c.getScans());
+               assertEquals(1, c.getDecompressions());
+               assertEquals(1, c.getOverlappingDecompressions());
+               assertEquals(30, c.getLeftMultiplications());
+       }
+
+       @Test
+       public void testRightMultiplications() {
+               InstructionTypeCounter c = new InstructionTypeCounter();
+               c.incScans();
+               c.incDecompressions();
+               c.incOverlappingDecompressions();
+               c.incLMM(30);
+               c.incRMM();
+               c.toString();
+               assertEquals(1, c.getScans());
+               assertEquals(1, c.getDecompressions());
+               assertEquals(1, c.getOverlappingDecompressions());
+               assertEquals(30, c.getLeftMultiplications());
+               assertEquals(1, c.getRightMultiplications());
+       }
+
+       @Test
+       public void testRightMultiplications_2() {
+               InstructionTypeCounter c = new InstructionTypeCounter();
+               c.incScans();
+               c.incDecompressions();
+               c.incOverlappingDecompressions();
+               c.incLMM(30);
+               c.incRMM(23);
+               c.toString();
+               assertEquals(1, c.getScans());
+               assertEquals(1, c.getDecompressions());
+               assertEquals(1, c.getOverlappingDecompressions());
+               assertEquals(30, c.getLeftMultiplications());
+               assertEquals(23, c.getRightMultiplications());
+       }
+
+       @Test
+       public void testCMM() {
+               InstructionTypeCounter c = new InstructionTypeCounter();
+               c.incScans();
+               c.incDecompressions();
+               c.incOverlappingDecompressions();
+               c.incLMM(30);
+               c.incRMM(23);
+               c.incCMM();
+               c.toString();
+               assertEquals(1, c.getScans());
+               assertEquals(1, c.getDecompressions());
+               assertEquals(1, c.getOverlappingDecompressions());
+               assertEquals(30, c.getLeftMultiplications());
+               assertEquals(23, c.getRightMultiplications());
+               assertEquals(1, c.getCompressedMultiplications());
+       }
+
+       @Test
+       public void testCMM_2() {
+               InstructionTypeCounter c = new InstructionTypeCounter();
+               c.incScans();
+               c.incDecompressions();
+               c.incOverlappingDecompressions();
+               c.incLMM(30);
+               c.incRMM(23);
+               c.incCMM(42);
+               c.toString();
+               assertEquals(1, c.getScans());
+               assertEquals(1, c.getDecompressions());
+               assertEquals(1, c.getOverlappingDecompressions());
+               assertEquals(30, c.getLeftMultiplications());
+               assertEquals(23, c.getRightMultiplications());
+               assertEquals(42, c.getCompressedMultiplications());
+       }
+
+       @Test
+       public void testDictOps() {
+               InstructionTypeCounter c = new InstructionTypeCounter();
+               c.incScans();
+               c.incDecompressions();
+               c.incOverlappingDecompressions();
+               c.incLMM(30);
+               c.incRMM(23);
+               c.incCMM();
+               c.incDictOps();
+               c.toString();
+               assertEquals(1, c.getScans());
+               assertEquals(1, c.getDecompressions());
+               assertEquals(1, c.getOverlappingDecompressions());
+               assertEquals(30, c.getLeftMultiplications());
+               assertEquals(23, c.getRightMultiplications());
+               assertEquals(1, c.getCompressedMultiplications());
+               assertEquals(1, c.getDictionaryOps());
+       }
+
+       @Test
+       public void testDictOps_2() {
+               InstructionTypeCounter c = new InstructionTypeCounter();
+               c.incScans();
+               c.incDecompressions();
+               c.incOverlappingDecompressions();
+               c.incLMM(30);
+               c.incRMM(23);
+               c.incCMM();
+               c.incDictOps(222);
+               c.toString();
+               assertEquals(1, c.getScans());
+               assertEquals(1, c.getDecompressions());
+               assertEquals(1, c.getOverlappingDecompressions());
+               assertEquals(30, c.getLeftMultiplications());
+               assertEquals(23, c.getRightMultiplications());
+               assertEquals(1, c.getCompressedMultiplications());
+               assertEquals(222, c.getDictionaryOps());
+       }
+
+       @Test
+       public void testIndexing() {
+               InstructionTypeCounter c = new InstructionTypeCounter();
+               c.incScans();
+               c.incDecompressions();
+               c.incOverlappingDecompressions();
+               c.incLMM(30);
+               c.incRMM(23);
+               c.incCMM();
+               c.incDictOps();
+               c.incIndexOp();
+               c.toString();
+
+               assertEquals(1, c.getScans());
+               assertEquals(1, c.getDecompressions());
+               assertEquals(1, c.getOverlappingDecompressions());
+               assertEquals(30, c.getLeftMultiplications());
+               assertEquals(23, c.getRightMultiplications());
+               assertEquals(1, c.getCompressedMultiplications());
+               assertEquals(1, c.getDictionaryOps());
+               assertEquals(1, c.getIndexing());
+       }
+
+       @Test
+       public void testIndexing_2() {
+               InstructionTypeCounter c = new InstructionTypeCounter();
+               c.incScans();
+               c.incDecompressions();
+               c.incOverlappingDecompressions();
+               c.incLMM(30);
+               c.incRMM(23);
+               c.incCMM();
+               c.incDictOps();
+               c.incIndexOp(425);
+               c.toString();
+
+               assertEquals(1, c.getScans());
+               assertEquals(1, c.getDecompressions());
+               assertEquals(1, c.getOverlappingDecompressions());
+               assertEquals(30, c.getLeftMultiplications());
+               assertEquals(23, c.getRightMultiplications());
+               assertEquals(1, c.getCompressedMultiplications());
+               assertEquals(1, c.getDictionaryOps());
+               assertEquals(425, c.getIndexing());
+       }
+
+       @Test
+       public void testDensifying() {
+               InstructionTypeCounter c = new InstructionTypeCounter();
+               c.incScans();
+               c.toString();
+
+               assertEquals(1, c.getScans());
+               assertFalse(c.isDensifying());
+       }
+
+       @Test
+       public void testDensifying_2() {
+               InstructionTypeCounter c = new InstructionTypeCounter();
+               c.incScans();
+               c.setDensifying(true);
+               c.toString();
+
+               assertEquals(1, c.getScans());
+               assertTrue(c.isDensifying());
+       }
+
+       @Test
+       public void testVarious() {
+               InstructionTypeCounter c = new InstructionTypeCounter();
+               c.incScans();
+               c.incOverlappingDecompressions();
+               c.incOverlappingDecompressions();
+               c.incDecompressions();
+               c.incDecompressions();
+               c.incLMM(30);
+               c.incDecompressions();
+               c.incOverlappingDecompressions();
+               c.incRMM(23);
+               c.incCMM();
+               c.incDictOps();
+               c.incRMM(22);
+               c.incDictOps();
+               c.incScans();
+               c.incLMM(32);
+               c.incCMM();
+               c.incOverlappingDecompressions();
+               c.incDictOps();
+               c.incIndexOp();
+
+               c.toString();
+
+               assertEquals(2, c.getScans());
+               assertEquals(3, c.getDecompressions());
+               assertEquals(4, c.getOverlappingDecompressions());
+               assertEquals(62, c.getLeftMultiplications());
+               assertEquals(45, c.getRightMultiplications());
+               assertEquals(2, c.getCompressedMultiplications());
+               assertEquals(3, c.getDictionaryOps());
+               assertEquals(1, c.getIndexing());
+       }
+
+       @Test
+       public void testDicrectConstructor() {
+
+               InstructionTypeCounter c = new InstructionTypeCounter(2, 3, 4, 
62, 45, 2, 3, 1, false);
+
+               assertEquals(2, c.getScans());
+               assertEquals(3, c.getDecompressions());
+               assertEquals(4, c.getOverlappingDecompressions());
+               assertEquals(62, c.getLeftMultiplications());
+               assertEquals(45, c.getRightMultiplications());
+               assertEquals(2, c.getCompressedMultiplications());
+               assertEquals(3, c.getDictionaryOps());
+               assertEquals(1, c.getIndexing());
+               assertTrue(!c.isDensifying());
+       }
+
+       @Test
+       public void testDicrectConstructor_2() {
+               InstructionTypeCounter c = new InstructionTypeCounter(2, 3, 4, 
62, 45, 2, 3, 1, true);
+               assertEquals(2, c.getScans());
+               assertEquals(3, c.getDecompressions());
+               assertEquals(4, c.getOverlappingDecompressions());
+               assertEquals(62, c.getLeftMultiplications());
+               assertEquals(45, c.getRightMultiplications());
+               assertEquals(2, c.getCompressedMultiplications());
+               assertEquals(3, c.getDictionaryOps());
+               assertEquals(1, c.getIndexing());
+               assertTrue(c.isDensifying());
+       }
+}
diff --git 
a/src/test/java/org/apache/sysds/test/component/compress/estim/SampleEstimatorTest.java
 
b/src/test/java/org/apache/sysds/test/component/compress/estim/SampleEstimatorTest.java
index 96b2c34504..283ff8d5ad 100644
--- 
a/src/test/java/org/apache/sysds/test/component/compress/estim/SampleEstimatorTest.java
+++ 
b/src/test/java/org/apache/sysds/test/component/compress/estim/SampleEstimatorTest.java
@@ -29,7 +29,6 @@ import org.apache.sysds.runtime.compress.estim.AComEst;
 import org.apache.sysds.runtime.compress.estim.ComEstFactory;
 import org.apache.sysds.runtime.matrix.data.MatrixBlock;
 import org.apache.sysds.test.TestUtils;
-import org.junit.Ignore;
 import org.junit.Test;
 
 public class SampleEstimatorTest {
@@ -51,43 +50,37 @@ public class SampleEstimatorTest {
        }
 
        @Test
-       @Ignore
        public void compressedSizeInfoEstimatorSample_90() {
-               
testSampleEstimateIsAtMaxEstimatedElementsInEachColumnsProduct(0.9, 0.9);
+               // Overrule to exact when over 80%
+               
testSampleEstimateIsAtMaxEstimatedElementsInEachColumnsProduct(0.9, 1.0); 
        }
 
        @Test
-       @Ignore
        public void compressedSizeInfoEstimatorSample_50() {
                
testSampleEstimateIsAtMaxEstimatedElementsInEachColumnsProduct(0.5, 0.90);
        }
 
        @Test
-       @Ignore
        public void compressedSizeInfoEstimatorSample_20() {
                
testSampleEstimateIsAtMaxEstimatedElementsInEachColumnsProduct(0.2, 0.8);
        }
 
        @Test
-       @Ignore
        public void compressedSizeInfoEstimatorSample_10() {
                
testSampleEstimateIsAtMaxEstimatedElementsInEachColumnsProduct(0.1, 0.75);
        }
 
        @Test
-       @Ignore
        public void compressedSizeInfoEstimatorSample_5() {
                
testSampleEstimateIsAtMaxEstimatedElementsInEachColumnsProduct(0.05, 0.7);
        }
 
        @Test
-       @Ignore
        public void compressedSizeInfoEstimatorSample_1() {
                
testSampleEstimateIsAtMaxEstimatedElementsInEachColumnsProduct(0.01, 0.6);
        }
 
        @Test
-       @Ignore
        public void compressedSizeInfoEstimatorSample_p1() {
                
testSampleEstimateIsAtMaxEstimatedElementsInEachColumnsProduct(0.001, 0.5);
        }
@@ -109,7 +102,7 @@ public class SampleEstimatorTest {
 
                cs_estimate.transposed = true;
 
-               final AComEst estimate = ComEstFactory.createEstimator(mbt, 
cs_estimate, 1);
+               final AComEst estimate = ComEstFactory.createEstimator(mbt, 
cs_estimate, (int)(ratio * mbt.getNumColumns()) ,1);
                final int estimate_1 = estimate.getColGroupInfo(new int[] 
{0}).getNumVals() + 1;
                final int estimate_2 = estimate.getColGroupInfo(new int[] 
{1}).getNumVals() + 1;
 
diff --git 
a/src/test/java/org/apache/sysds/test/component/compress/estim/encoding/EncodeNegativeTest.java
 
b/src/test/java/org/apache/sysds/test/component/compress/estim/encoding/EncodeNegativeTest.java
new file mode 100644
index 0000000000..0edf02bf6e
--- /dev/null
+++ 
b/src/test/java/org/apache/sysds/test/component/compress/estim/encoding/EncodeNegativeTest.java
@@ -0,0 +1,99 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.sysds.test.component.compress.estim.encoding;
+
+import org.apache.commons.lang.NotImplementedException;
+import org.apache.sysds.runtime.compress.estim.encoding.EncodingFactory;
+import org.apache.sysds.runtime.data.DenseBlockFP64;
+import org.apache.sysds.runtime.matrix.data.MatrixBlock;
+import org.junit.Test;
+
+public class EncodeNegativeTest {
+
+       final MatrixBlock mock;
+
+       public EncodeNegativeTest() {
+               mock = new MatrixBlock(3, 3, new DenseBlockFP64Mock(new int[] 
{3, 3}, new double[] {1, 2, 3, 4, 5, 6, 7, 8, 9}));
+               mock.setNonZeros(9);
+       }
+
+       @Test(expected = NotImplementedException.class)
+       public void encodeNonContiguous() {
+               EncodingFactory.createFromMatrixBlock(mock, false, 3);
+       }
+
+       @Test(expected = NotImplementedException.class)
+       public void encodeNonContiguousTransposed() {
+               EncodingFactory.createFromMatrixBlock(mock, true, 3);
+       }
+
+       @Test(expected = NullPointerException.class)
+       public void testInvalidToCallWithNullDeltaTransposed() {
+               EncodingFactory.createFromMatrixBlockDelta(null, true, null);
+       }
+
+       @Test(expected = NullPointerException.class)
+       public void testInvalidToCallWithNullDelta() {
+               EncodingFactory.createFromMatrixBlockDelta(null, false, null);
+       }
+
+       @Test(expected = NullPointerException.class)
+       public void testInvalidToCallWithNull() {
+               EncodingFactory.createFromMatrixBlock(null, false, null);
+       }
+
+       @Test(expected = NotImplementedException.class)
+       public void testDeltaTransposed() {
+               EncodingFactory.createFromMatrixBlockDelta(new MatrixBlock(10, 
10, false), true, null);
+       }
+
+       @Test(expected = NotImplementedException.class)
+       public void testDelta() {
+               EncodingFactory.createFromMatrixBlockDelta(new MatrixBlock(10, 
10, false), false, null);
+       }
+
+       @Test(expected = NotImplementedException.class)
+       public void testDeltaTransposedNVals() {
+               EncodingFactory.createFromMatrixBlockDelta(new MatrixBlock(10, 
10, false), true, null, 2);
+       }
+
+       @Test(expected = NotImplementedException.class)
+       public void testDeltaNVals() {
+               EncodingFactory.createFromMatrixBlockDelta(new MatrixBlock(10, 
10, false), false, null, 1);
+       }
+
+       private class DenseBlockFP64Mock extends DenseBlockFP64 {
+               private static final long serialVersionUID = 
-3601232958390554672L;
+
+               public DenseBlockFP64Mock(int[] dims, double[] data) {
+                       super(dims, data);
+               }
+
+               @Override
+               public boolean isContiguous() {
+                       return false;
+               }
+
+               @Override
+               public int numBlocks() {
+                       return 2;
+               }
+       }
+}
diff --git 
a/src/test/java/org/apache/sysds/test/component/compress/estim/encoding/EncodeSampleCustom.java
 
b/src/test/java/org/apache/sysds/test/component/compress/estim/encoding/EncodeSampleCustom.java
index cbf5a331be..1e02d5ff57 100644
--- 
a/src/test/java/org/apache/sysds/test/component/compress/estim/encoding/EncodeSampleCustom.java
+++ 
b/src/test/java/org/apache/sysds/test/component/compress/estim/encoding/EncodeSampleCustom.java
@@ -28,14 +28,11 @@ import java.util.Arrays;
 import java.util.Scanner;
 import java.util.regex.Pattern;
 
-import org.apache.commons.lang.NotImplementedException;
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.sysds.runtime.compress.colgroup.mapping.AMapToData;
 import org.apache.sysds.runtime.compress.colgroup.mapping.MapToFactory;
 import org.apache.sysds.runtime.compress.estim.encoding.DenseEncoding;
-import org.apache.sysds.runtime.compress.estim.encoding.IEncode;
-import org.apache.sysds.runtime.matrix.data.MatrixBlock;
 import org.junit.Test;
 
 import scala.NotImplementedError;
@@ -50,8 +47,6 @@ public class EncodeSampleCustom {
                int[] d2 = 
readData("src/test/resources/component/compress/sample/s2.dat");
                int m1 = Arrays.stream(d1).max().getAsInt() + 1;
                int m2 = Arrays.stream(d2).max().getAsInt() + 1;
-               // LOG.error(m1 + " " + m2 + " " + (m1 * m2));
-
                AMapToData dm1 = MapToFactory.create(d1.length, d1, m1);
                AMapToData dm2 = MapToFactory.create(d2.length, d2, m2);
 
@@ -89,25 +84,4 @@ public class EncodeSampleCustom {
                        throw new NotImplementedError();
                }
        }
-
-       @Test(expected = NullPointerException.class)
-       public void testInvalidToCallWithNullDelta() {
-               IEncode.createFromMatrixBlockDelta(null, true, null);
-       }
-
-       @Test(expected = NullPointerException.class)
-       public void testInvalidToCallWithNull() {
-               IEncode.createFromMatrixBlock(null, true, null);
-       }
-
-       @Test(expected = NotImplementedException.class)
-       public void testDeltaTransposed() {
-               IEncode.createFromMatrixBlockDelta(new MatrixBlock(10, 10, 
false), true, null);
-       }
-
-       @Test(expected = NotImplementedException.class)
-       public void testDelta() {
-               IEncode.createFromMatrixBlockDelta(new MatrixBlock(10, 10, 
false), false, null);
-       }
-
 }
diff --git 
a/src/test/java/org/apache/sysds/test/component/compress/estim/encoding/EncodeSampleSingleColTest.java
 
b/src/test/java/org/apache/sysds/test/component/compress/estim/encoding/EncodeSampleSingleColTest.java
index d629b023d6..1e11221f1b 100644
--- 
a/src/test/java/org/apache/sysds/test/component/compress/estim/encoding/EncodeSampleSingleColTest.java
+++ 
b/src/test/java/org/apache/sysds/test/component/compress/estim/encoding/EncodeSampleSingleColTest.java
@@ -26,6 +26,7 @@ import java.util.Collection;
 
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
+import org.apache.sysds.runtime.compress.estim.encoding.EncodingFactory;
 import org.apache.sysds.runtime.compress.estim.encoding.IEncode;
 import org.apache.sysds.runtime.matrix.data.MatrixBlock;
 import org.apache.sysds.test.TestUtils;
@@ -103,14 +104,22 @@ public class EncodeSampleSingleColTest extends 
EncodeSampleTest {
        }
 
        public static Object[] create(int nRow, int nCol, double sparsity, 
boolean transposed, int nUnique, int seed) {
+               return create(nRow, nCol, sparsity, transposed, nUnique, seed, 
false);
+       }
+
+       public static Object[] create(int nRow, int nCol, double sparsity, 
boolean transposed, int nUnique, int seed, boolean forceSparse) {
                try {
                        int u = nUnique;
                        // Make sure that nUnique always is correct if we have 
a large enough matrix.
                        MatrixBlock m = 
TestUtils.round(TestUtils.generateTestMatrixBlock(nRow, nCol, 0.5, nUnique, 
sparsity, seed));
+
+                       if(forceSparse)
+                               m.denseToSparse(true);
+
                        u += sparsity < 1.0 && sparsity != 0 ? 1 : 0;
                        boolean t = transposed;
 
-                       IEncode e = IEncode.createFromMatrixBlock(m, t, 0);
+                       IEncode e = EncodingFactory.createFromMatrixBlock(m, t, 
0);
                        return new Object[] {m, t, u, e};
                }
                catch(Exception e) {
@@ -129,7 +138,7 @@ public class EncodeSampleSingleColTest extends 
EncodeSampleTest {
 
                        boolean t = transposed;
 
-                       IEncode e = IEncode.createFromMatrixBlock(m, t, 0);
+                       IEncode e = EncodingFactory.createFromMatrixBlock(m, t, 
0);
                        return new Object[] {m, t, u, e};
                }
                catch(Exception e) {
diff --git 
a/src/test/java/org/apache/sysds/test/component/compress/estim/encoding/EncodeSampleTest.java
 
b/src/test/java/org/apache/sysds/test/component/compress/estim/encoding/EncodeSampleTest.java
index bf5620da8a..81d0ace0ef 100644
--- 
a/src/test/java/org/apache/sysds/test/component/compress/estim/encoding/EncodeSampleTest.java
+++ 
b/src/test/java/org/apache/sysds/test/component/compress/estim/encoding/EncodeSampleTest.java
@@ -25,6 +25,9 @@ import static org.junit.Assert.fail;
 import org.apache.commons.logging.Log;
 import org.apache.commons.logging.LogFactory;
 import org.apache.sysds.runtime.compress.CompressionSettingsBuilder;
+import org.apache.sysds.runtime.compress.colgroup.AColGroup.CompressionType;
+import org.apache.sysds.runtime.compress.estim.EstimationFactors;
+import org.apache.sysds.runtime.compress.estim.encoding.EncodingFactory;
 import org.apache.sysds.runtime.compress.estim.encoding.IEncode;
 import org.apache.sysds.runtime.matrix.data.MatrixBlock;
 import org.junit.Test;
@@ -93,7 +96,7 @@ public abstract class EncodeSampleTest {
        public void testCombineEmptyLeft() {
                try {
                        final MatrixBlock empty = new 
MatrixBlock(m.getNumRows(), m.getNumColumns(), true);
-                       final IEncode emptyEncoding = 
IEncode.createFromMatrixBlock(empty, t, 0);
+                       final IEncode emptyEncoding = 
EncodingFactory.createFromMatrixBlock(empty, t, 0);
                        assertEquals(u, emptyEncoding.combine(e).getUnique());
                }
                catch(Exception e) {
@@ -106,7 +109,7 @@ public abstract class EncodeSampleTest {
        public void testCombineEmptyRight() {
                try {
                        final MatrixBlock empty = new 
MatrixBlock(m.getNumRows(), m.getNumColumns(), true);
-                       final IEncode emptyEncoding = 
IEncode.createFromMatrixBlock(empty, t, 0);
+                       final IEncode emptyEncoding = 
EncodingFactory.createFromMatrixBlock(empty, t, 0);
                        assertEquals(u, e.combine(emptyEncoding).getUnique());
                }
                catch(Exception e) {
@@ -119,7 +122,7 @@ public abstract class EncodeSampleTest {
        public void testCombineConstLeft() {
                try {
                        final MatrixBlock c = new MatrixBlock(m.getNumRows(), 
m.getNumColumns(), 1.0);
-                       final IEncode emptyEncoding = 
IEncode.createFromMatrixBlock(c, t, 0);
+                       final IEncode emptyEncoding = 
EncodingFactory.createFromMatrixBlock(c, t, 0);
                        assertEquals(u, emptyEncoding.combine(e).getUnique());
                }
                catch(Exception e) {
@@ -132,7 +135,7 @@ public abstract class EncodeSampleTest {
        public void testCombineConstRight() {
                try {
                        final MatrixBlock c = new MatrixBlock(m.getNumRows(), 
m.getNumColumns(), 1.0);
-                       final IEncode emptyEncoding = 
IEncode.createFromMatrixBlock(c, t, 0);
+                       final IEncode emptyEncoding = 
EncodingFactory.createFromMatrixBlock(c, t, 0);
                        final IEncode comp = e.combine(emptyEncoding);
                        assertEquals(u, comp.getUnique());
                }
@@ -146,11 +149,41 @@ public abstract class EncodeSampleTest {
        public void toEstimationFactors() {
                try {
                        int rows = t ? m.getNumColumns() : m.getNumRows();
-                       e.extractFacts(rows, 1.0, 1.0, new 
CompressionSettingsBuilder().create());
+                       EstimationFactors a = e.extractFacts(rows, 1.0, 1.0, 
new CompressionSettingsBuilder().create());
+                       int[] f = a.getFrequencies();
+                       if(f != null)
+                               for(int i : f)
+                                       if(i <= 0)
+                                               fail("Frequencies contains 
zero");
                }
                catch(Exception e) {
                        e.printStackTrace();
                        fail(e.getMessage());
                }
        }
+
+       @Test
+       public void toEstimationFactorsWithRLE() {
+               try {
+                       int rows = t ? m.getNumColumns() : m.getNumRows();
+                       EstimationFactors a = e.extractFacts(rows, 1.0, 1.0, 
new 
CompressionSettingsBuilder().addValidCompression(CompressionType.RLE).create());
+                       int[] f = a.getFrequencies();
+                       if(f != null)
+                               for(int i : f)
+                                       if(i <= 0)
+                                               fail("Frequencies contains 
zero");
+               }
+               catch(Exception e) {
+                       e.printStackTrace();
+                       fail(e.getMessage());
+               }
+       }
+
+       @Test
+       public void isDense(){
+               boolean d = e.isDense();
+               int rows = t ? m.getNumColumns() : m.getNumRows();
+               if(rows == 1 && m.isInSparseFormat() && ! d)
+                       fail ("Should extract sparse if input is sparse and one 
column (row)");
+       }
 }
diff --git 
a/src/test/java/org/apache/sysds/test/component/compress/estim/encoding/EncodeSampleUnbalancedTest.java
 
b/src/test/java/org/apache/sysds/test/component/compress/estim/encoding/EncodeSampleUnbalancedTest.java
index 62bc26c7f0..6f45882c7b 100644
--- 
a/src/test/java/org/apache/sysds/test/component/compress/estim/encoding/EncodeSampleUnbalancedTest.java
+++ 
b/src/test/java/org/apache/sysds/test/component/compress/estim/encoding/EncodeSampleUnbalancedTest.java
@@ -24,6 +24,7 @@ import static org.junit.Assert.fail;
 import java.util.ArrayList;
 import java.util.Collection;
 
+import org.apache.sysds.runtime.compress.estim.encoding.EncodingFactory;
 import org.apache.sysds.runtime.compress.estim.encoding.IEncode;
 import org.apache.sysds.runtime.matrix.data.MatrixBlock;
 import org.apache.sysds.test.TestUtils;
@@ -49,6 +50,27 @@ public class EncodeSampleUnbalancedTest extends 
EncodeSampleMultiColTest {
                tests.add(createT(1, .1, 2, 1, 1.0, 2, 100, 32141));
                tests.add(createT(1, 1.0, 2, 1, 0.1, 2, 100, 777));
 
+               tests.add(createT(1, .1, 1, 1, 1.0, 1, 100, 32141));
+               tests.add(createT(1, 1.0, 1, 1, 0.1, 1, 100, 777));
+
+               tests.add(createT(1, .4, 1, 1, .4, 1, 100, 32141));
+               tests.add(createT(1, .4, 1, 1, .4, 1, 100, 777));
+
+               tests.add(createT(1, .4, 2, 1, .4, 2, 100, 32141));
+               tests.add(createT(1, .4, 2, 1, .4, 2, 100, 777));
+
+               tests.add(createT(1, .4, 3, 1, .4, 3, 100, 32141));
+               tests.add(createT(1, .4, 3, 1, .4, 3, 100, 777));
+
+               tests.add(createTSparse(1, .5, 3, 1, .5, 3, 100, 32141, true, 
true));
+               tests.add(createTSparse(1, .5, 3, 1, .5, 3, 100, 777, true, 
true));
+               tests.add(createTSparse(1, .2, 3, 1, 1.0, 3, 100, 3377, true, 
true));
+
+               for(int i = 0; i < 10; i++) {
+
+                       tests.add(createTSparse(1, .01, 2, 1, .01, 2, 100, i * 
231, true, true));
+               }
+
                // big sparse
                tests.add(createT(1, 0.0001, 10, 1, 0.0000001, 2, 10000000, 
1231));
                // more rows
@@ -60,12 +82,17 @@ public class EncodeSampleUnbalancedTest extends 
EncodeSampleMultiColTest {
                return tests;
        }
 
+       private static Object[] createTSparse(int nRow1, double sp1, int nU1, 
int nRow2, double sp2, int nU2, int nCol,
+               int seed, boolean forceSparse, boolean forceSparse2) {
+               return create(nRow1, nCol, sp1, nU1, nRow2, nCol, sp2, nU2, 
seed, true, forceSparse, forceSparse2);
+       }
+
        private static Object[] createT(int nRow1, double sp1, int nU1, int 
nRow2, double sp2, int nU2, int nCol, int seed) {
-               return create(nRow1, nCol, sp1, nU1, nRow2, nCol, sp2, nU2, 
seed, true);
+               return create(nRow1, nCol, sp1, nU1, nRow2, nCol, sp2, nU2, 
seed, true, false, false);
        }
 
        private static Object[] create(int nRow1, int nCol1, double sp1, int 
nU1, int nRow2, int nCol2, double sp2, int nU2,
-               int seed, boolean t) {
+               int seed, boolean t, boolean forceSparse, boolean forceSparse2) 
{
                try {
                        // Make sure that nUnique always is correct if we have 
a large enough matrix.
                        nU1 -= sp1 < 1.0 ? 1 : 0;
@@ -75,6 +102,12 @@ public class EncodeSampleUnbalancedTest extends 
EncodeSampleMultiColTest {
                        final int min2 = sp2 < 1.0 ? 0 : 1;
                        MatrixBlock m2 = TestUtils
                                .round(TestUtils.generateTestMatrixBlock(nRow2, 
nCol2, min2, nU2, sp2, seed * 21351));
+
+                       if(forceSparse)
+                               m1.denseToSparse(true);
+                       if(forceSparse2)
+                               m2.denseToSparse(true);
+
                        return create(m1, m2, t);
                }
                catch(Exception e) {
@@ -87,18 +120,17 @@ public class EncodeSampleUnbalancedTest extends 
EncodeSampleMultiColTest {
        protected static Object[] create(MatrixBlock m1, MatrixBlock m2, 
boolean t) {
 
                MatrixBlock m = m1.append(m2, null, !t);
-               return create(m,m1,m2,t);
+               return create(m, m1, m2, t);
        }
 
-
        protected static Object[] create(MatrixBlock m, MatrixBlock m1, 
MatrixBlock m2, boolean t) {
                try {
 
-                       final IEncode e = IEncode.createFromMatrixBlock(m, t, 
genRowCol(t ? m.getNumRows() : m.getNumColumns()));
+                       final IEncode e = 
EncodingFactory.createFromMatrixBlock(m, t, genRowCol(t ? m.getNumRows() : 
m.getNumColumns()));
 
                        // sub part.
-                       final IEncode fh = IEncode.createFromMatrixBlock(m1, t, 
genRowCol(t ? m1.getNumRows() : m1.getNumColumns()));
-                       final IEncode sh = IEncode.createFromMatrixBlock(m2, t, 
genRowCol(t ? m2.getNumRows() : m2.getNumColumns()));
+                       final IEncode fh = 
EncodingFactory.createFromMatrixBlock(m1, t, genRowCol(t ? m1.getNumRows() : 
m1.getNumColumns()));
+                       final IEncode sh = 
EncodingFactory.createFromMatrixBlock(m2, t, genRowCol(t ? m2.getNumRows() : 
m2.getNumColumns()));
 
                        // join subparts and use its unique count for tests
                        final IEncode er = fh.combine(sh);
diff --git 
a/src/test/java/org/apache/sysds/test/component/compress/estim/encoding/EncodeSampleUniformTest.java
 
b/src/test/java/org/apache/sysds/test/component/compress/estim/encoding/EncodeSampleUniformTest.java
index 9a1d64a08a..534d0d8e23 100644
--- 
a/src/test/java/org/apache/sysds/test/component/compress/estim/encoding/EncodeSampleUniformTest.java
+++ 
b/src/test/java/org/apache/sysds/test/component/compress/estim/encoding/EncodeSampleUniformTest.java
@@ -24,6 +24,7 @@ import static org.junit.Assert.fail;
 import java.util.ArrayList;
 import java.util.Collection;
 
+import org.apache.sysds.runtime.compress.estim.encoding.EncodingFactory;
 import org.apache.sysds.runtime.compress.estim.encoding.IEncode;
 import org.apache.sysds.runtime.matrix.data.MatrixBlock;
 import org.apache.sysds.test.TestUtils;
@@ -98,12 +99,12 @@ public class EncodeSampleUniformTest extends 
EncodeSampleMultiColTest {
                        // Make sure that nUnique always is correct if we have 
a large enough matrix.
 
                        final int d = t ? m.getNumRows() : m.getNumColumns();
-                       final IEncode e = IEncode.createFromMatrixBlock(m, t, 
genRowCol(d));
+                       final IEncode e = 
EncodingFactory.createFromMatrixBlock(m, t, genRowCol(d));
 
                        // split and read subparts individually
                        final int dfh = d / 2;
-                       final IEncode fh = IEncode.createFromMatrixBlock(m, t, 
genRowCol(dfh));
-                       final IEncode sh = IEncode.createFromMatrixBlock(m, t, 
genRowCol(dfh, d));
+                       final IEncode fh = 
EncodingFactory.createFromMatrixBlock(m, t, genRowCol(dfh));
+                       final IEncode sh = 
EncodingFactory.createFromMatrixBlock(m, t, genRowCol(dfh, d));
 
                        // join subparts and use its unique count for tests
                        final IEncode er = fh.combine(sh);
diff --git 
a/src/test/java/org/apache/sysds/test/component/compress/estim/sample/SampleDistinctNegativeTest.java
 
b/src/test/java/org/apache/sysds/test/component/compress/estim/sample/SampleDistinctNegativeTest.java
new file mode 100644
index 0000000000..6b253ff9bc
--- /dev/null
+++ 
b/src/test/java/org/apache/sysds/test/component/compress/estim/sample/SampleDistinctNegativeTest.java
@@ -0,0 +1,63 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.sysds.test.component.compress.estim.sample;
+
+import java.util.ArrayList;
+import java.util.Collection;
+
+import org.apache.sysds.runtime.compress.estim.sample.SampleEstimatorFactory;
+import 
org.apache.sysds.runtime.compress.estim.sample.SampleEstimatorFactory.EstimationType;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.Parameterized;
+import org.junit.runners.Parameterized.Parameters;
+
+@RunWith(value = Parameterized.class)
+public class SampleDistinctNegativeTest {
+
+       private final int[] frequencies;
+
+       public SampleDistinctNegativeTest(int[] frequencies) {
+               this.frequencies = frequencies;
+       }
+
+       @Parameters
+       public static Collection<Object[]> data() {
+               ArrayList<Object[]> tests = new ArrayList<>();
+
+               tests.add(new Object[] {new int[] {-1}});
+               tests.add(new Object[] {new int[] {-10}});
+
+               tests.add(new Object[] {new int[] {-1022, 4, 2, 1, 3, -32}});
+               tests.add(new Object[] {new int[] {10, 9, 8, 7, 6, 4, 4, 3, 2, 
1, 0, -1}});
+               
+               // 0 is also invalid input of the frequency counts.
+               // It is impossible to count 0 occurrences of anything.
+               tests.add(new Object[] {new int[] {0}});
+
+               return tests;
+       }
+
+       @Test(expected = ArrayIndexOutOfBoundsException.class)
+       public void testDistinctCountIsCorrectIfSampleIs100Percent() {
+               SampleEstimatorFactory.distinctCount(frequencies, 100, 2, 
EstimationType.HassAndStokes);
+       }
+
+}
diff --git 
a/src/test/java/org/apache/sysds/test/component/compress/estim/SampleDistinctTest.java
 
b/src/test/java/org/apache/sysds/test/component/compress/estim/sample/SampleDistinctTest.java
similarity index 99%
rename from 
src/test/java/org/apache/sysds/test/component/compress/estim/SampleDistinctTest.java
rename to 
src/test/java/org/apache/sysds/test/component/compress/estim/sample/SampleDistinctTest.java
index fe12e886c1..3ecc4ca894 100644
--- 
a/src/test/java/org/apache/sysds/test/component/compress/estim/SampleDistinctTest.java
+++ 
b/src/test/java/org/apache/sysds/test/component/compress/estim/sample/SampleDistinctTest.java
@@ -17,7 +17,7 @@
  * under the License.
  */
 
-package org.apache.sysds.test.component.compress.estim;
+package org.apache.sysds.test.component.compress.estim.sample;
 
 import static org.junit.Assert.assertEquals;
 import static org.junit.Assert.fail;
@@ -59,6 +59,7 @@ public class SampleDistinctTest {
                HashMap<Integer, Double> solveCache = new HashMap<>();
 
                for(EstimationType type : EstimationType.values()) {
+                       
                        tests.add(new Object[] {null, type, solveCache});
                        tests.add(new Object[] {new int[] {}, type, 
solveCache});
                        tests.add(new Object[] {new int[] {97, 6, 56, 4, 242, 
123, 2}, type, solveCache});
diff --git 
a/src/test/java/org/apache/sysds/test/component/compress/estim/sample/ShlosserEstimatorTest.java
 
b/src/test/java/org/apache/sysds/test/component/compress/estim/sample/ShlosserEstimatorTest.java
new file mode 100644
index 0000000000..7e30c5d9fa
--- /dev/null
+++ 
b/src/test/java/org/apache/sysds/test/component/compress/estim/sample/ShlosserEstimatorTest.java
@@ -0,0 +1,89 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.sysds.test.component.compress.estim.sample;
+
+import java.util.ArrayList;
+import java.util.Collection;
+
+import org.apache.sysds.runtime.compress.estim.sample.ShlosserEstimator;
+import org.junit.Test;
+import org.junit.runner.RunWith;
+import org.junit.runners.Parameterized;
+import org.junit.runners.Parameterized.Parameters;
+
+@RunWith(value = Parameterized.class)
+public class ShlosserEstimatorTest {
+
+       @Parameters
+       public static Collection<Object[]> data() {
+               ArrayList<Object[]> tests = new ArrayList<>();
+
+               final int m = Integer.MAX_VALUE;
+               tests.add(create(new int[] {0, 0, 0, 0, 0, m, m, m, m, m}));
+               tests.add(create(new int[] {m, m, m, m, m}));
+               tests.add(create(new int[] {m, m, m, m}));
+               tests.add(create(new int[] {m, m, m}));
+               tests.add(create(new int[] {m, m}));
+               tests.add(create(new int[] {m}));
+
+               final int l = Integer.MIN_VALUE;
+               tests.add(create(new int[] {l}));
+
+               tests.add(create(new int[] {1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0, 0, 0, 0, 0, 0, 0, m}));
+
+
+               tests.add(createHardBigSample(new int[]{1,0,0,0, m}));
+
+
+               tests.add(createHardSmallSample(new int[]{1,0,0,0, 99}));
+
+               return tests;
+       }
+
+       private static Object[] create(int[] frequencies) {
+               return new Object[] {frequencies, 100000L, 10000000, 9999999};
+       }
+
+       private static Object[] createHardBigSample(int[] frequencies) {
+               return new Object[] {frequencies, Long.MAX_VALUE, 
Long.MAX_VALUE, Long.MAX_VALUE-1};
+       }
+
+       private static Object[] createHardSmallSample(int[] frequencies) {
+               return new Object[] {frequencies, Long.MAX_VALUE, 
Long.MAX_VALUE, 100};
+       }
+
+       final long numVals;
+       final int[] freqCounts;
+       final long nRows;
+       final long sampleSize;
+
+       public ShlosserEstimatorTest(int[] freqCounts, long numVals, long 
nRows, long sampleSize) {
+               this.freqCounts = freqCounts;
+               this.numVals = numVals;
+               this.nRows = nRows;
+               this.sampleSize = sampleSize;
+       }
+
+       @Test
+       public void testWildEstimates() {
+               ShlosserEstimator.distinctCount(numVals, freqCounts, nRows, 
sampleSize);
+       }
+
+}
diff --git 
a/src/test/java/org/apache/sysds/test/component/compress/mapping/MappingTests.java
 
b/src/test/java/org/apache/sysds/test/component/compress/mapping/MappingTests.java
index d1837c3681..4d9712efcb 100644
--- 
a/src/test/java/org/apache/sysds/test/component/compress/mapping/MappingTests.java
+++ 
b/src/test/java/org/apache/sysds/test/component/compress/mapping/MappingTests.java
@@ -80,7 +80,7 @@ public class MappingTests {
                this.seed = seed;
                this.type = type;
                this.size = size;
-               this.max = Math.min(MappingTestUtil.getUpperBoundValue(type), 
fictiveMax) + 1;
+               this.max 
=Math.min(Math.min(MappingTestUtil.getUpperBoundValue(type), fictiveMax) + 1, 
size);
                expected = new int[size];
                m = genMap(MapToFactory.create(size, max), expected, max, fill, 
seed);
        }
@@ -113,8 +113,11 @@ public class MappingTests {
                }
 
                // to make sure that the bit set is actually filled.
-               m.set(size - 1, max - 1);
-               expected[size - 1] = max - 1;
+               for(int i = 0; i < max; i++){
+
+                       m.set(i, i);
+                       expected[i] = i;
+               }
                return m;
        }
 
diff --git 
a/src/test/java/org/apache/sysds/test/component/compress/mapping/MappingTestsResize.java
 
b/src/test/java/org/apache/sysds/test/component/compress/mapping/MappingTestsResize.java
index 6585e33926..e2a38db08d 100644
--- 
a/src/test/java/org/apache/sysds/test/component/compress/mapping/MappingTestsResize.java
+++ 
b/src/test/java/org/apache/sysds/test/component/compress/mapping/MappingTestsResize.java
@@ -19,6 +19,8 @@
 
 package org.apache.sysds.test.component.compress.mapping;
 
+import static org.junit.Assert.fail;
+
 import java.util.ArrayList;
 import java.util.Collection;
 
@@ -45,6 +47,7 @@ public class MappingTestsResize {
                ArrayList<Object[]> tests = new ArrayList<>();
                for(MAP_TYPE t : MAP_TYPE.values()) {
                        tests.add(new Object[] {1, t, 13, false});
+                       tests.add(new Object[] {1, t, 632, false});
                }
                return tests;
        }
@@ -53,10 +56,17 @@ public class MappingTestsResize {
                this.seed = seed;
                this.type = type;
                this.size = size;
-               final int max = MappingTestUtil.getUpperBoundValue(type);
-               final int maxSmaller = getMaxSmaller(type);
-               expected = new int[size];
-               m = MappingTests.genMap(MapToFactory.create(size, max), 
expected, maxSmaller, fill, seed);
+               try{
+
+                       final int max = 
Math.min(MappingTestUtil.getUpperBoundValue(type),size);
+                       final int maxSmaller = Math.min(getMaxSmaller(type), 
size);
+                       expected = new int[size];
+                       m = MappingTests.genMap(MapToFactory.create(size, max), 
expected, maxSmaller, fill, seed);
+               }
+               catch(Exception e){
+                       e.printStackTrace();
+                       fail("Failed creating mapping resize test");
+               }
        }
 
        @Test
diff --git 
a/src/test/java/org/apache/sysds/test/component/compress/offset/OffsetTests.java
 
b/src/test/java/org/apache/sysds/test/component/compress/offset/OffsetTests.java
index e014cefa5d..157d9a642d 100644
--- 
a/src/test/java/org/apache/sysds/test/component/compress/offset/OffsetTests.java
+++ 
b/src/test/java/org/apache/sysds/test/component/compress/offset/OffsetTests.java
@@ -446,6 +446,15 @@ public class OffsetTests {
                }
        }
 
+       @Test
+       public void testIteratorToString(){
+               AOffsetIterator a = o.getOffsetIterator();
+               a.toString();
+
+               AIterator b = o.getIterator();
+               b.toString();
+       }
+
        protected static void compare(AOffset o, int[] v) {
                AIterator i = o.getIterator();
                if(v[0] != i.value())
diff --git 
a/src/test/java/org/apache/sysds/test/component/federated/FedWorkerMatrixMultiplyWorkload.java
 
b/src/test/java/org/apache/sysds/test/component/federated/FedWorkerMatrixMultiplyWorkload.java
index c37d7864b4..06a193368c 100644
--- 
a/src/test/java/org/apache/sysds/test/component/federated/FedWorkerMatrixMultiplyWorkload.java
+++ 
b/src/test/java/org/apache/sysds/test/component/federated/FedWorkerMatrixMultiplyWorkload.java
@@ -69,7 +69,7 @@ public class FedWorkerMatrixMultiplyWorkload extends 
FedWorkerBase {
        @Test
        public void verifySameOrAlsoCompressedAsLocalCompress() {
                // Local
-               final InstructionTypeCounter c = 
InstructionTypeCounter.MML(1000, 10);
+               final InstructionTypeCounter c =  new InstructionTypeCounter(0, 
0, 0, 1000, 0, 0, 0, 0, false);
                final MatrixBlock mbcLocal = 
CompressedMatrixBlockFactory.compress(mbr, c).getLeft();
                if(!(mbcLocal instanceof CompressedMatrixBlock))
                        return; // would not compress anyway so skip

Reply via email to