This is an automated email from the ASF dual-hosted git repository. baunsgaard pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/systemds.git
commit f7b20d78cff111106ecc4248352efe39db7cefb0 Author: baunsgaard <[email protected]> AuthorDate: Tue May 11 21:15:12 2021 +0200 [SYSTEMDS-3001] CLA MM Left using existing kernels Compressed left multiplication have two phases, first pre aggregation then a matrix multiplication. This commit make the matrix multiplication use the default systemds kernels. This allows for exploitation of the various dedicated MM kernels already in SystemDS. --- src/main/java/org/apache/sysds/conf/DMLConfig.java | 2 +- .../runtime/compress/CompressionSettings.java | 5 +- .../compress/CompressionSettingsBuilder.java | 4 + .../sysds/runtime/compress/cocode/CoCodeCost.java | 27 +-- .../compress/cocode/CoCodeCostMatrixMult.java | 41 +++-- .../sysds/runtime/compress/colgroup/AColGroup.java | 34 ++-- .../compress/colgroup/ColGroupCompressed.java | 38 ---- .../runtime/compress/colgroup/ColGroupConst.java | 43 +++-- .../runtime/compress/colgroup/ColGroupDDC.java | 25 +++ .../runtime/compress/colgroup/ColGroupEmpty.java | 10 +- .../runtime/compress/colgroup/ColGroupFactory.java | 5 +- .../runtime/compress/colgroup/ColGroupOLE.java | 5 + .../runtime/compress/colgroup/ColGroupRLE.java | 5 + .../runtime/compress/colgroup/ColGroupSDC.java | 5 + .../compress/colgroup/ColGroupSDCSingle.java | 5 + .../compress/colgroup/ColGroupSDCSingleZeros.java | 5 + .../compress/colgroup/ColGroupSDCZeros.java | 9 +- .../runtime/compress/colgroup/ColGroupSizes.java | 2 +- .../compress/colgroup/ColGroupUncompressed.java | 37 ++-- .../runtime/compress/colgroup/ColGroupValue.java | 193 ++++++++++----------- .../compress/estim/CompressedSizeEstimator.java | 21 ++- .../estim/CompressedSizeEstimatorExact.java | 7 +- .../estim/CompressedSizeEstimatorFactory.java | 27 ++- .../estim/CompressedSizeEstimatorSample.java | 93 ++++------ .../compress/estim/CompressedSizeInfoColGroup.java | 14 +- .../runtime/compress/estim/EstimationFactors.java | 17 +- .../runtime/compress/lib/CLALibLeftMultBy.java | 83 ++++----- .../sysds/runtime/compress/utils/ABitmap.java | 21 ++- .../sysds/runtime/compress/utils/Bitmap.java | 2 +- .../compress/colgroup/JolEstimateOLETest.java | 6 +- .../compress/colgroup/JolEstimateTest.java | 22 ++- 31 files changed, 413 insertions(+), 400 deletions(-) diff --git a/src/main/java/org/apache/sysds/conf/DMLConfig.java b/src/main/java/org/apache/sysds/conf/DMLConfig.java index bf1767b..e978197 100644 --- a/src/main/java/org/apache/sysds/conf/DMLConfig.java +++ b/src/main/java/org/apache/sysds/conf/DMLConfig.java @@ -128,7 +128,7 @@ public class DMLConfig _defaultVals.put(COMPRESSED_LOSSY, "false" ); _defaultVals.put(COMPRESSED_VALID_COMPRESSIONS, "SDC,DDC"); _defaultVals.put(COMPRESSED_OVERLAPPING, "true" ); - _defaultVals.put(COMPRESSED_SAMPLING_RATIO, "0.01"); + _defaultVals.put(COMPRESSED_SAMPLING_RATIO, "NaN"); _defaultVals.put(COMPRESSED_COCODE, "COST"); _defaultVals.put(COMPRESSED_TRANSPOSE, "auto"); _defaultVals.put(CODEGEN, "false" ); diff --git a/src/main/java/org/apache/sysds/runtime/compress/CompressionSettings.java b/src/main/java/org/apache/sysds/runtime/compress/CompressionSettings.java index 7be74ef..ddbc60b 100644 --- a/src/main/java/org/apache/sysds/runtime/compress/CompressionSettings.java +++ b/src/main/java/org/apache/sysds/runtime/compress/CompressionSettings.java @@ -126,8 +126,9 @@ public class CompressionSettings { sb.append("\n Lossy: " + lossy); sb.append("\n sortValuesByLength: " + sortValuesByLength); sb.append("\n column Partitioner: " + columnPartitioner); - sb.append("\n max Static ColGroup CoCode " + maxColGroupCoCode); - sb.append("\n max cocodePercentage " + coCodePercentage); + sb.append("\n Max Static ColGroup CoCode: " + maxColGroupCoCode); + sb.append("\n Max cocodePercentage: " + coCodePercentage); + sb.append("\n Sample Percentage: " + samplingRatio); // If needed for debugging add more fields to the printing. return sb.toString(); } diff --git a/src/main/java/org/apache/sysds/runtime/compress/CompressionSettingsBuilder.java b/src/main/java/org/apache/sysds/runtime/compress/CompressionSettingsBuilder.java index 72aeeeb..216267e 100644 --- a/src/main/java/org/apache/sysds/runtime/compress/CompressionSettingsBuilder.java +++ b/src/main/java/org/apache/sysds/runtime/compress/CompressionSettingsBuilder.java @@ -44,6 +44,8 @@ public class CompressionSettingsBuilder { private int maxStaticColGroupCoCode = 10000; private double coCodePercentage = 0.01; + private final static double defaultSampleRate = 0.01; + public CompressionSettingsBuilder() { DMLConfig conf = ConfigurationManager.getDMLConfig(); @@ -54,6 +56,8 @@ public class CompressionSettingsBuilder { validCompressions.add(CompressionType.valueOf(comp)); } samplingRatio = conf.getDoubleValue(DMLConfig.COMPRESSED_SAMPLING_RATIO); + if(Double.isNaN(samplingRatio)) + samplingRatio = defaultSampleRate; columnPartitioner = PartitionerType.valueOf(conf.getTextValue(DMLConfig.COMPRESSED_COCODE)); transposeInput = conf.getTextValue(DMLConfig.COMPRESSED_TRANSPOSE); diff --git a/src/main/java/org/apache/sysds/runtime/compress/cocode/CoCodeCost.java b/src/main/java/org/apache/sysds/runtime/compress/cocode/CoCodeCost.java index d2aec2c..66ad209 100644 --- a/src/main/java/org/apache/sysds/runtime/compress/cocode/CoCodeCost.java +++ b/src/main/java/org/apache/sysds/runtime/compress/cocode/CoCodeCost.java @@ -26,7 +26,6 @@ import java.util.PriorityQueue; import java.util.Queue; import org.apache.sysds.runtime.compress.CompressionSettings; -import org.apache.sysds.runtime.compress.colgroup.AColGroup.CompressionType; import org.apache.sysds.runtime.compress.estim.CompressedSizeEstimator; import org.apache.sysds.runtime.compress.estim.CompressedSizeInfo; import org.apache.sysds.runtime.compress.estim.CompressedSizeInfoColGroup; @@ -67,12 +66,8 @@ public class CoCodeCost extends AColumnCoCoder { Queue<CompressedSizeInfoColGroup> que = new PriorityQueue<>(currentGroups.size(), comp); List<CompressedSizeInfoColGroup> ret = new ArrayList<>(); - for(CompressedSizeInfoColGroup g : currentGroups) { - if(g.getBestCompressionType() == CompressionType.CONST) - ret.add(g); - else - que.add(g); - } + for(CompressedSizeInfoColGroup g : currentGroups) + que.add(g); boolean finished = false; while(!finished) { @@ -83,31 +78,27 @@ public class CoCodeCost extends AColumnCoCoder { int worstCaseJoinedSize = l.getNumVals() * r.getNumVals(); if(worstCaseJoinedSize < toSmallForAnalysis) que.add(joinWithoutAnalysis(l, r)); - else if(worstCaseJoinedSize < largestDistinct){ - + else if(worstCaseJoinedSize < largestDistinct) { CompressedSizeInfoColGroup g = joinWithAnalysis(l, r); if(g.getNumVals() < largestDistinct) que.add(joinWithAnalysis(l, r)); - else{ - finished = true; - que.add(l); + else { + ret.add(l); que.add(r); } } else { - finished = true; - que.add(l); + ret.add(l); que.add(r); } } - else { - que.add(l); - finished = true; - } + else + ret.add(l); } else finished = true; } + for(CompressedSizeInfoColGroup g : que) ret.add(g); diff --git a/src/main/java/org/apache/sysds/runtime/compress/cocode/CoCodeCostMatrixMult.java b/src/main/java/org/apache/sysds/runtime/compress/cocode/CoCodeCostMatrixMult.java index 09a9990..910c94a 100644 --- a/src/main/java/org/apache/sysds/runtime/compress/cocode/CoCodeCostMatrixMult.java +++ b/src/main/java/org/apache/sysds/runtime/compress/cocode/CoCodeCostMatrixMult.java @@ -26,6 +26,7 @@ import java.util.PriorityQueue; import java.util.Queue; import org.apache.sysds.runtime.compress.CompressionSettings; +import org.apache.sysds.runtime.compress.colgroup.AColGroup.CompressionType; import org.apache.sysds.runtime.compress.estim.CompressedSizeEstimator; import org.apache.sysds.runtime.compress.estim.CompressedSizeInfo; import org.apache.sysds.runtime.compress.estim.CompressedSizeInfoColGroup; @@ -40,20 +41,8 @@ import org.apache.sysds.runtime.compress.estim.CompressedSizeInfoColGroup; */ public class CoCodeCostMatrixMult extends AColumnCoCoder { - /** - * This value specifies the maximum distinct count allowed int a coCoded group. Note that this value is the number - * of distinct tuples not the total number of values. That value can be calculated by multiplying the number of - * tuples with columns in the coCoded group. - */ - private final int largestDistinct; - - private final int toSmallForAnalysis; - protected CoCodeCostMatrixMult(CompressedSizeEstimator e, CompressionSettings cs, int numRows) { super(e, cs, numRows); - largestDistinct = Math.max(256, (int) (_est.getNumRows() * _est.getNumColumns() * cs.coCodePercentage * 0.2)); - toSmallForAnalysis = Math.min(Math.max(256, largestDistinct / 4), 1028); - LOG.debug("CocodeCost largest Distinct: " + largestDistinct + " toSmallForAnalysis: " + toSmallForAnalysis); } @Override @@ -104,11 +93,31 @@ public class CoCodeCostMatrixMult extends AColumnCoCoder { protected CostOfJoin(CompressedSizeInfoColGroup elm) { this.elm = elm; - final int nRows = _est.getNumRows(); - final double commonFraction = elm.getMostCommonFraction(); - final double rowsToProcess = commonFraction > 0.2 ? nRows * (1 - Math.min(commonFraction, 0.95)) : nRows; - this.cost = rowsToProcess + elm.getNumVals() * elm.getColumns().length; + final double constantOverheadForColGroup = 5; + final double nCols = elm.getColumns().length; + final double nRows = _est.getNumRows(); + if(elm.getBestCompressionType() == CompressionType.UNCOMPRESSED) + this.cost = nRows * nCols * 2 + constantOverheadForColGroup; + else { + final int blksz = CompressionSettings.BITMAP_BLOCK_SZ; + + // LOG.error(constantOverheadForColGroup); + final double commonFraction = elm.getMostCommonFraction(); + final double rowsCost = commonFraction > 0.2 ? nRows * (1 - commonFraction) : nRows; + // this.cost = rowsToProcess + elm.getNumVals() * nCols + constantOverheadForColGroup; + // this.cost = rowsToProcess + elm.getNumVals() * nCols * (1 - commonFraction) + + // constantOverheadForColGroup; + // final double sparsity_tuple_effect = elm.getTupleSparsity() > 0.4 ? 1 - + // Math.min(elm.getTupleSparsity(), 0.9) : 1; + final int numberTuples = elm.getNumVals(); + final double tuplesCost = (numberTuples < blksz) ? numberTuples : numberTuples * 2; + + // this.cost = elementsCost; + // this.cost = rowsCost + tuplesCost * sparsity_tuple_effect + constantOverheadForColGroup; + + this.cost = rowsCost + tuplesCost + constantOverheadForColGroup; + } } @Override diff --git a/src/main/java/org/apache/sysds/runtime/compress/colgroup/AColGroup.java b/src/main/java/org/apache/sysds/runtime/compress/colgroup/AColGroup.java index 8de274f..ddc6195 100644 --- a/src/main/java/org/apache/sysds/runtime/compress/colgroup/AColGroup.java +++ b/src/main/java/org/apache/sysds/runtime/compress/colgroup/AColGroup.java @@ -524,37 +524,33 @@ public abstract class AColGroup implements Serializable { /** * Left multiply with this column group * - * @param matrix The matrix to multiply with on the left - * @param result The result to output the values into, always dense for the purpose of the column groups - * parallelizing - * @param numCols The number of columns contained in the CompressedMatrixBlock that this column group is inside. + * @param matrix The matrix to multiply with on the left + * @param result The result to output the values into, always dense for the purpose of the column groups + * parallelizing */ - public void leftMultByMatrix(MatrixBlock matrix, double[] result, int numCols) { - leftMultByMatrix(matrix, result, numCols, 0, matrix.getNumRows()); + public void leftMultByMatrix(MatrixBlock matrix, MatrixBlock result) { + leftMultByMatrix(matrix, result, 0, matrix.getNumRows()); } /** * Left multiply with this column group. * - * @param matrix The matrix to multiply with on the left - * @param result The result to output the values into, always dense for the purpose of the column groups - * parallelizing - * @param numCols The number of columns contained in the CompressedMatrixBlock that this column group is inside. - * @param rl The row to begin the multiplication from - * @param ru The row to end the multiplication at. + * @param matrix The matrix to multiply with on the left + * @param result The result to output the values into, always dense for the purpose of the column groups + * parallelizing + * @param rl The row to begin the multiplication from on the lhs matrix + * @param ru The row to end the multiplication at on the lhs matrix */ - public abstract void leftMultByMatrix(MatrixBlock matrix, double[] result, int numCols, int rl, int ru); + public abstract void leftMultByMatrix(MatrixBlock matrix, MatrixBlock result, int rl, int ru); /** * Left side matrix multiplication with a column group that is transposed. * - * @param lhs The left hand side Column group to multiply with, the left hand side should be considered - * transposed. - * @param result The result matrix to insert the result of the multiplication into - * @param numRows The number of rows in the left hand side matrix - * @param numCols The number of columns in the right hand side matrix + * @param lhs The left hand side Column group to multiply with, the left hand side should be considered + * transposed. + * @param result The result matrix to insert the result of the multiplication into */ - public abstract void leftMultByAColGroup(AColGroup lhs, double[] result, int numRows, int numCols); + public abstract void leftMultByAColGroup(AColGroup lhs, MatrixBlock result); /** * Perform the specified scalar operation directly on the compressed column group, without decompressing individual diff --git a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupCompressed.java b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupCompressed.java index c8f9a41..c5d29ba 100644 --- a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupCompressed.java +++ b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupCompressed.java @@ -20,7 +20,6 @@ package org.apache.sysds.runtime.compress.colgroup; import org.apache.sysds.runtime.DMLScriptException; -import org.apache.sysds.runtime.data.SparseBlock; import org.apache.sysds.runtime.functionobjects.Builtin; import org.apache.sysds.runtime.functionobjects.Builtin.BuiltinCode; import org.apache.sysds.runtime.functionobjects.KahanPlus; @@ -30,7 +29,6 @@ import org.apache.sysds.runtime.functionobjects.Plus; import org.apache.sysds.runtime.functionobjects.ReduceAll; import org.apache.sysds.runtime.functionobjects.ReduceCol; import org.apache.sysds.runtime.functionobjects.ReduceRow; -import org.apache.sysds.runtime.matrix.data.MatrixBlock; import org.apache.sysds.runtime.matrix.operators.AggregateUnaryOperator; /** @@ -89,42 +87,6 @@ public abstract class ColGroupCompressed extends AColGroup { protected abstract boolean sameIndexStructure(ColGroupCompressed that); - /** - * Multiply with a matrix on the left. - * - * @param matrix matrix to left multiply - * @param result matrix block result - * @param numRows The number of rows in the matrix input - * @param numCols The number of columns in the colGroups parent matrix. - * @param rl The row to start the matrix multiplication from - * @param ru The row to stop the matrix multiplication at. - */ - public abstract void leftMultByMatrix(double[] matrix, double[] result, int numRows, int numCols, int rl, int ru); - - /** - * Multiply with a sparse matrix on the left hand side, and add the values to the output result - * - * @param sb The sparse block to multiply with - * @param result The linearized output matrix - * @param numRows The number of rows in the left hand side input matrix (the sparse one) - * @param numCols The number of columns in the compression. - * @param rl The row to start the matrix multiplication from - * @param ru The row to stop the matrix multiplication at. - */ - public abstract void leftMultBySparseMatrix(SparseBlock sb, double[] result, int numRows, int numCols, int rl, - int ru); - - @Override - public final void leftMultByMatrix(MatrixBlock matrix, double[] result, int numCols, int rl, int ru) { - if(matrix.isEmpty()) - return; - else if(matrix.isInSparseFormat()) - leftMultBySparseMatrix(matrix.getSparseBlock(), result, matrix.getNumRows(), numCols, rl, ru); - else { - leftMultByMatrix(matrix.getDenseBlockValues(), result, matrix.getNumRows(), numCols, rl, ru); - } - } - @Override public final double getMin() { return computeMxx(Double.POSITIVE_INFINITY, Builtin.getBuiltinFnObject(BuiltinCode.MIN)); diff --git a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupConst.java b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupConst.java index eb38ee0..8f6299c 100644 --- a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupConst.java +++ b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupConst.java @@ -210,23 +210,31 @@ public class ColGroupConst extends ColGroupValue { } @Override - public void leftMultByMatrix(double[] a, double[] c, double[] values, int numRows, int numCols, int rl, int ru) { - for(int i = rl; i < ru; i++) { - double preAggVals = preAggregateSingle(a, i); - int offC = i * numCols; - for(int j = 0; j < _colIndexes.length; j++) { - c[offC + _colIndexes[j]] += preAggVals * values[j]; + public void leftMultByMatrix(MatrixBlock a, MatrixBlock c, double[] values, int rl, int ru) { + final double[] cV = c.getDenseBlockValues(); + if(a.isEmpty()) + return; + else if(a.isInSparseFormat()) { + SparseBlock sb = a.getSparseBlock(); + for(int i = rl; i < ru; i++) { + + if(!sb.isEmpty(i)) { + double v = preAggregateSparseSingle(sb, i); + int offC = i * c.getNumColumns(); + for(int j = 0; j < _colIndexes.length; j++) + cV[offC + _colIndexes[j]] += v * values[j]; + + } } } - } + else { + double[] aV = a.getDenseBlockValues(); + for(int i = rl; i < ru; i++) { + double preAggVals = preAggregateSingle(aV, i); + int offC = i * c.getNumColumns(); + for(int j = 0; j < _colIndexes.length; j++) + cV[offC + _colIndexes[j]] += preAggVals * values[j]; - @Override - public void leftMultBySparseMatrix(SparseBlock sb, double[] c, double[] values, int numRows, int numCols, int row) { - if(!sb.isEmpty(row)) { - double v = preAggregateSparseSingle(sb, row); - int offC = row * numCols; - for(int j = 0; j < _colIndexes.length; j++) { - c[offC + _colIndexes[j]] += v * values[j]; } } } @@ -315,7 +323,7 @@ public class ColGroupConst extends ColGroupValue { } @Override - public Dictionary preAggregateThatSDCSingleStructure(ColGroupSDCSingle that, Dictionary ret, boolean preModified){ + public Dictionary preAggregateThatSDCSingleStructure(ColGroupSDCSingle that, Dictionary ret, boolean preModified) { throw new DMLCompressionException("Does not make sense to call this"); } @@ -328,4 +336,9 @@ public class ColGroupConst extends ColGroupValue { protected boolean sameIndexStructure(ColGroupCompressed that) { return that instanceof ColGroupEmpty || that instanceof ColGroupConst; } + + @Override + public MatrixBlock preAggregate(MatrixBlock m, int rl, int ru) { + throw new NotImplementedException(); + } } diff --git a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupDDC.java b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupDDC.java index aa8a55f..df45f56 100644 --- a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupDDC.java +++ b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupDDC.java @@ -32,6 +32,8 @@ import org.apache.sysds.runtime.compress.colgroup.mapping.MapToFactory; import org.apache.sysds.runtime.compress.colgroup.offset.AIterator; import org.apache.sysds.runtime.compress.colgroup.pre.IPreAggregate; import org.apache.sysds.runtime.compress.colgroup.pre.PreAggregateFactory; +import org.apache.sysds.runtime.data.DenseBlock; +import org.apache.sysds.runtime.data.DenseBlockFP64; import org.apache.sysds.runtime.data.SparseBlock; import org.apache.sysds.runtime.functionobjects.Builtin; import org.apache.sysds.runtime.matrix.data.MatrixBlock; @@ -231,6 +233,29 @@ public class ColGroupDDC extends ColGroupValue { } + @Override + public MatrixBlock preAggregate(MatrixBlock m, int rl, int ru) { + + final int retCols = getNumValues(); + final int retRows = ru - rl; + final double[] vals = allocDVector(retRows * retCols, true); + final DenseBlock retB = new DenseBlockFP64(new int[] {retRows, retCols}, vals); + final MatrixBlock ret = new MatrixBlock(retRows, retCols, retB); + + final double[] mV = m.getDenseBlockValues(); + + ret.setNonZeros(retRows * retCols); + for(int k = rl; k < ru; k++) { + final int offT = ret.getNumColumns() * k; + final int offM = m.getNumColumns() * k; + for(int i = 0; i < _numRows; i++) { + int index = _data.getIndex(i); + vals[offT + index] += mV[offM + i]; + } + } + return ret; + } + /** * Generic get value for byte-length-agnostic access to first column. * diff --git a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupEmpty.java b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupEmpty.java index 2046d18..c873b1a 100644 --- a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupEmpty.java +++ b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupEmpty.java @@ -20,7 +20,6 @@ package org.apache.sysds.runtime.compress.colgroup; import org.apache.sysds.runtime.compress.colgroup.dictionary.Dictionary; -import org.apache.sysds.runtime.data.SparseBlock; import org.apache.sysds.runtime.functionobjects.Builtin; import org.apache.sysds.runtime.matrix.data.MatrixBlock; import org.apache.sysds.runtime.matrix.operators.BinaryOperator; @@ -125,12 +124,7 @@ public class ColGroupEmpty extends ColGroupCompressed { } @Override - public void leftMultByMatrix(double[] a, double[] c, int numRows, int numCols, int rl, int ru) { - // do nothing. - } - - @Override - public void leftMultBySparseMatrix(SparseBlock sb, double[] c, int numRows, int numCols, int rl, int ru) { + public void leftMultByMatrix(MatrixBlock a, MatrixBlock c, int rl, int ru) { // do nothing. } @@ -214,7 +208,7 @@ public class ColGroupEmpty extends ColGroupCompressed { } @Override - public void leftMultByAColGroup(AColGroup lhs, double[] result, int numRows, int numCols) { + public void leftMultByAColGroup(AColGroup lhs, MatrixBlock c) { // do nothing } diff --git a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupFactory.java b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupFactory.java index f16204e..62acf71 100644 --- a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupFactory.java +++ b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupFactory.java @@ -191,10 +191,11 @@ public class ColGroupFactory { private static AColGroup compressColGroupForced(MatrixBlock in, int[] colIndexes, CompressionSettings compSettings) { + + ABitmap ubm = BitmapEncoder.extractBitmap(colIndexes, in, compSettings.transposed); - CompressedSizeEstimator estimator = new CompressedSizeEstimatorExact(in, compSettings, compSettings.transposed); + CompressedSizeEstimator estimator = new CompressedSizeEstimatorExact(in, compSettings); - ABitmap ubm = BitmapEncoder.extractBitmap(colIndexes, in, compSettings.transposed); CompressedSizeInfoColGroup sizeInfo = new CompressedSizeInfoColGroup( estimator.estimateCompressedColGroupSize(ubm, colIndexes), compSettings.validCompressions); diff --git a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupOLE.java b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupOLE.java index 8101f40..2df2878 100644 --- a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupOLE.java +++ b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupOLE.java @@ -1212,4 +1212,9 @@ public class ColGroupOLE extends ColGroupOffset { public Dictionary preAggregateThatSDCSingleStructure(ColGroupSDCSingle that, Dictionary ret, boolean preModified){ throw new NotImplementedException(); } + + @Override + public MatrixBlock preAggregate(MatrixBlock m, int rl, int ru) { + throw new NotImplementedException(); + } } diff --git a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupRLE.java b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupRLE.java index 1e79abc..1649591 100644 --- a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupRLE.java +++ b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupRLE.java @@ -1163,4 +1163,9 @@ public class ColGroupRLE extends ColGroupOffset { public Dictionary preAggregateThatSDCSingleStructure(ColGroupSDCSingle that, Dictionary ret, boolean preModified){ throw new NotImplementedException(); } + + @Override + public MatrixBlock preAggregate(MatrixBlock m, int rl, int ru) { + throw new NotImplementedException(); + } } diff --git a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupSDC.java b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupSDC.java index 268f294..e449259 100644 --- a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupSDC.java +++ b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupSDC.java @@ -744,4 +744,9 @@ public class ColGroupSDC extends ColGroupValue { return ret; } + @Override + public MatrixBlock preAggregate(MatrixBlock m, int rl, int ru) { + throw new NotImplementedException(); + } + } diff --git a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupSDCSingle.java b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupSDCSingle.java index 14afdf4..58649c3 100644 --- a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupSDCSingle.java +++ b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupSDCSingle.java @@ -669,4 +669,9 @@ public class ColGroupSDCSingle extends ColGroupValue { } } + + @Override + public MatrixBlock preAggregate(MatrixBlock m, int rl, int ru) { + throw new NotImplementedException(); + } } diff --git a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupSDCSingleZeros.java b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupSDCSingleZeros.java index 34d148d..94be81c 100644 --- a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupSDCSingleZeros.java +++ b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupSDCSingleZeros.java @@ -453,4 +453,9 @@ public class ColGroupSDCSingleZeros extends ColGroupValue { public Dictionary preAggregateThatSDCSingleStructure(ColGroupSDCSingle that, Dictionary ret, boolean preModified){ throw new NotImplementedException(); } + + @Override + public MatrixBlock preAggregate(MatrixBlock m, int rl, int ru) { + throw new NotImplementedException(); + } } diff --git a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupSDCZeros.java b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupSDCZeros.java index 410ec90..23b06a9 100644 --- a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupSDCZeros.java +++ b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupSDCZeros.java @@ -534,9 +534,14 @@ public class ColGroupSDCZeros extends ColGroupValue { } return ret; } - + + @Override + public Dictionary preAggregateThatSDCSingleStructure(ColGroupSDCSingle that, Dictionary re, boolean preModified) { + throw new NotImplementedException(); + } + @Override - public Dictionary preAggregateThatSDCSingleStructure(ColGroupSDCSingle that, Dictionary re, boolean preModified){ + public MatrixBlock preAggregate(MatrixBlock m, int rl, int ru) { throw new NotImplementedException(); } } diff --git a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupSizes.java b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupSizes.java index f9bed0e..3c8c50a 100644 --- a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupSizes.java +++ b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupSizes.java @@ -124,7 +124,7 @@ public class ColGroupSizes { // Since the Object is a col group the overhead from the Memory Size group is added size += estimateInMemorySizeGroup(nrColumns); size += 8; // reference to MatrixBlock. - size += MatrixBlock.estimateSizeInMemory(nrRows, nrColumns, sparsity); + size += MatrixBlock.estimateSizeInMemory(nrRows, nrColumns, (nrColumns > 1) ? sparsity : 1); return size; } } diff --git a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupUncompressed.java b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupUncompressed.java index a848031..3cec6fb 100644 --- a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupUncompressed.java +++ b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupUncompressed.java @@ -315,33 +315,34 @@ public class ColGroupUncompressed extends AColGroup { LibMatrixMult.matrixMult(_data, subMatrix, result); } - public void leftMultByMatrix(MatrixBlock matrix, double[] result, int numCols, int rl, int ru) { + public void leftMultByMatrix(MatrixBlock matrix, MatrixBlock result, int rl, int ru) { final MatrixBlock tmpRet = new MatrixBlock(ru - rl, _data.getNumColumns(), false); tmpRet.allocateDenseBlock(); final MatrixBlock leftSlice = matrix.slice(rl, ru - 1, false); LibMatrixMult.matrixMult(leftSlice, _data, tmpRet); - int offT = numCols * rl; + int offT = result.getNumColumns() * rl; + final double[] resV = result.getDenseBlockValues(); if(tmpRet.isEmpty()) return; else if(tmpRet.isInSparseFormat()) { final SparseBlock sb = tmpRet.getSparseBlock(); - for(int rowIdx = 0; rowIdx < ru - rl; rowIdx++, offT += numCols) { + for(int rowIdx = 0; rowIdx < ru - rl; rowIdx++, offT += result.getNumColumns()) { if(!sb.isEmpty(rowIdx)) { final int apos = sb.pos(rowIdx); final int alen = sb.size(rowIdx) + apos; final int[] aix = sb.indexes(rowIdx); final double[] avals = sb.values(rowIdx); for(int col = apos; col < alen; col++) - result[offT + _colIndexes[aix[col]]] += avals[col]; + resV[offT + _colIndexes[aix[col]]] += avals[col]; } } } else { final double[] tmpRetV = tmpRet.getDenseBlockValues(); - for(int j = rl, offTemp = 0; j < ru; j++, offTemp += _colIndexes.length, offT += numCols) + for(int j = rl, offTemp = 0; j < ru; j++, offTemp += _colIndexes.length, offT += result.getNumColumns()) for(int i = 0; i < _colIndexes.length; i++) - result[offT + _colIndexes[i]] += tmpRetV[offTemp + i]; + resV[offT + _colIndexes[i]] += tmpRetV[offTemp + i]; } } @@ -565,7 +566,7 @@ public class ColGroupUncompressed extends AColGroup { } @Override - public void leftMultByAColGroup(AColGroup lhs, double[] result, final int numRows, final int numCols) { + public void leftMultByAColGroup(AColGroup lhs, MatrixBlock result) { if(lhs instanceof ColGroupEmpty) return; if(lhs instanceof ColGroupUncompressed) { @@ -587,18 +588,19 @@ public class ColGroupUncompressed extends AColGroup { LibMatrixMult.matrixMult(transposed, this._data, tmpRet); } + final double[] resV = result.getDenseBlockValues(); if(tmpRet.isEmpty()) return; else if(tmpRet.isInSparseFormat()) { SparseBlock sb = tmpRet.getSparseBlock(); - for(int rowIdx = 0, offT = 0; rowIdx < tmpRet.getNumRows(); rowIdx++, offT += numCols) { + for(int rowIdx = 0, offT = 0; rowIdx < tmpRet.getNumRows(); rowIdx++, offT += result.getNumColumns()) { if(!sb.isEmpty(rowIdx)) { final int apos = sb.pos(rowIdx); final int alen = sb.size(rowIdx) + apos; final int[] aix = sb.indexes(rowIdx); final double[] avals = sb.values(rowIdx); for(int col = apos; col < alen; col++) - result[offT + _colIndexes[aix[col]]] += avals[col]; + resV[offT + _colIndexes[aix[col]]] += avals[col]; } } } @@ -606,9 +608,9 @@ public class ColGroupUncompressed extends AColGroup { double[] tmpRetV = tmpRet.getDenseBlockValues(); for(int j = 0, offTemp = 0, offT = 0; j < tmpRet.getNumRows(); - j++, offTemp += _colIndexes.length, offT += numCols) { + j++, offTemp += _colIndexes.length, offT += result.getNumColumns()) { for(int i = 0; i < _colIndexes.length; i++) - result[offT + _colIndexes[i]] += tmpRetV[offTemp + i]; + resV[offT + _colIndexes[i]] += tmpRetV[offTemp + i]; } } @@ -618,16 +620,19 @@ public class ColGroupUncompressed extends AColGroup { LOG.warn("Inefficient transpose of uncompressed to fit to" + " t(AColGroup) %*% UncompressedColGroup mult by colGroup uncompressed column" + " Currently solved by t(t(Uncompressed) %*% AColGroup"); - double[] tmpTransposedResult = new double[result.length]; + MatrixBlock tmpTransposedResult = new MatrixBlock(result.getNumColumns(), result.getNumRows(), false); + tmpTransposedResult.allocateDenseBlock(); MatrixBlock ucCG = getData(); MatrixBlock tmp = new MatrixBlock(ucCG.getNumColumns(), ucCG.getNumRows(), ucCG.isInSparseFormat()); LibMatrixReorg.transpose(ucCG, tmp, InfrastructureAnalyzer.getLocalParallelism()); - lhs.leftMultByMatrix(tmp, tmpTransposedResult, numRows); + lhs.leftMultByMatrix(tmp, tmpTransposedResult); - for(int row = 0; row < numRows; row++) { - for(int col = 0; col < numCols; col++) { - result[row * numCols + col] += tmpTransposedResult[col * numRows + row]; + final double[] resV = result.getDenseBlockValues(); + final double[] tmpV = tmpTransposedResult.getDenseBlockValues(); + for(int row = 0; row < result.getNumRows(); row++) { + for(int col = 0; col < result.getNumColumns(); col++) { + resV[row * result.getNumColumns() + col] += tmpV[col * result.getNumRows() + row]; } } } diff --git a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupValue.java b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupValue.java index dc52ca1..fc7da24 100644 --- a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupValue.java +++ b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupValue.java @@ -36,9 +36,12 @@ import org.apache.sysds.runtime.compress.colgroup.dictionary.DictionaryFactory; import org.apache.sysds.runtime.compress.colgroup.pre.ArrPreAggregate; import org.apache.sysds.runtime.compress.colgroup.pre.IPreAggregate; import org.apache.sysds.runtime.controlprogram.parfor.stat.InfrastructureAnalyzer; +import org.apache.sysds.runtime.data.DenseBlock; +import org.apache.sysds.runtime.data.DenseBlockFP64; import org.apache.sysds.runtime.data.SparseBlock; import org.apache.sysds.runtime.functionobjects.Builtin; import org.apache.sysds.runtime.functionobjects.ValueFunction; +import org.apache.sysds.runtime.matrix.data.LibMatrixMult; import org.apache.sysds.runtime.matrix.data.LibMatrixReorg; import org.apache.sysds.runtime.matrix.data.MatrixBlock; import org.apache.sysds.runtime.matrix.operators.ScalarOperator; @@ -607,8 +610,8 @@ public abstract class ColGroupValue extends ColGroupCompressed implements Clonea * @param c The output matrix * @param numVals The number of values contained in the dictionary. */ - protected void postScaling(double[] dictValues, double[] vals, double[] c, int numVals) { - postScaling(dictValues, vals, c, numVals, 0, 0); + protected void postScaling(double[] dictValues, double[] vals, MatrixBlock c, int numVals) { + postScaling(dictValues, vals, c, numVals, 0); } /** @@ -621,41 +624,17 @@ public abstract class ColGroupValue extends ColGroupCompressed implements Clonea * @param row The row index in the output c to assign the result to. * @param totalCols The total number of columns in c. */ - protected void postScaling(double[] dictValues, double[] vals, double[] c, int numVals, int row, int totalCols) { + protected void postScaling(double[] dictValues, double[] vals, MatrixBlock c, int numVals, int row) { final int ncol = getNumCols(); int valOff = 0; + final double[] cv = c.getDenseBlockValues(); + final int totalCols = c.getNumColumns(); for(int k = 0; k < numVals; k++) { double aval = vals[k]; for(int j = 0; j < ncol; j++) { int colIx = _colIndexes[j] + row * totalCols; - c[colIx] += aval * dictValues[valOff++]; - } - } - } - - /** - * Post scale for left Multiplication - * - * @param dictValues The dictionary values materialized as double array. - * @param vals The values aggregated from the left side row vector. - * @param c The output matrix - * @param numVals The number of values contained in the dictionary. - * @param row The row index in the output c to assign the result to. - * @param totalCols The total number of columns in c. - * @param offT The offset into C to assign the values from usefull in case the c output is a smaller temporary - * array. - */ - protected void postScaling(double[] dictValues, double[] vals, double[] c, int numVals, int row, int totalCols, - int offT) { - final int nCol = getNumCols(); - int valOff = 0; - - for(int k = 0; k < numVals; k++) { - double aval = vals[k]; - for(int j = 0; j < nCol; j++) { - int colIx = _colIndexes[j] + row * totalCols; - c[offT + colIx] += aval * dictValues[valOff++]; + cv[colIx] += aval * dictValues[valOff++]; } } } @@ -670,6 +649,8 @@ public abstract class ColGroupValue extends ColGroupCompressed implements Clonea return preAggregate(a, 0); } + public abstract MatrixBlock preAggregate(MatrixBlock m, int rl, int ru); + /** * Pre aggregates for left multiplication * @@ -686,7 +667,14 @@ public abstract class ColGroupValue extends ColGroupCompressed implements Clonea * @return The pre-aggregated values. */ public double[] preAggregate(SparseBlock sb) { - return preAggregateSparse(sb, 0); + return preAggregateSparseWithCheck(sb, 0); + } + + private double[] preAggregateSparseWithCheck(SparseBlock sb, int row) { + if(sb != null && !sb.isEmpty(row)) + return preAggregateSparse(sb, row); + else + return null; } /** @@ -797,18 +785,18 @@ public abstract class ColGroupValue extends ColGroupCompressed implements Clonea } @Override - public void leftMultByAColGroup(AColGroup lhs, double[] result, final int numRows, final int numCols) { + public void leftMultByAColGroup(AColGroup lhs, MatrixBlock result) { if(lhs instanceof ColGroupEmpty) return; else if(lhs instanceof ColGroupValue) - leftMultByColGroupValue((ColGroupValue) lhs, result, numRows, numCols); + leftMultByColGroupValue((ColGroupValue) lhs, result); else if(lhs instanceof ColGroupUncompressed) { LOG.warn("Inefficient transpose of uncompressed to fit to " + "template need t(UnCompressedColGroup) %*% AColGroup support"); MatrixBlock ucCG = ((ColGroupUncompressed) lhs).getData(); MatrixBlock tmp = new MatrixBlock(ucCG.getNumColumns(), ucCG.getNumRows(), ucCG.isInSparseFormat()); LibMatrixReorg.transpose(ucCG, tmp, InfrastructureAnalyzer.getLocalParallelism()); - leftMultByMatrix(tmp, result, numCols); + leftMultByMatrix(tmp, result); } else @@ -816,35 +804,38 @@ public abstract class ColGroupValue extends ColGroupCompressed implements Clonea "Not supported left multiplication with A ColGroup of type: " + lhs.getClass().getSimpleName()); } - private void leftMultByColGroupValue(ColGroupValue lhs, double[] result, final int numRows, final int numCols) { + private void leftMultByColGroupValue(ColGroupValue lhs, MatrixBlock result) { final int nvL = lhs.getNumValues(); final int nvR = this.getNumValues(); final double[] lhValues = lhs.getValues(); final double[] rhValues = this.getValues(); final int lCol = lhs._colIndexes.length; final int rCol = this._colIndexes.length; + final double[] resV = result.getDenseBlockValues(); + final int numCols = result.getNumColumns(); final double threshold = 0.2; if(sameIndexStructure(lhs)) { int[] agI = getCounts(); for(int i = 0; i < agI.length; i++) { - for(int l = 0; l < lCol; l++) { - final int leftOff = lhs._colIndexes[l] * numCols; - final double lhV = lhValues[i * lCol + l] * agI[i]; - if(lhV != 0) - for(int r = 0; r < rCol; r++) { - final double rhV = rhValues[i * rCol + r]; - final double va = lhV * rhV; - result[leftOff + this._colIndexes[r]] += va; - } - } + if(i < nvL) + for(int l = 0; l < lCol; l++) { + final int leftOff = lhs._colIndexes[l] * numCols; + final double lhV = lhValues[i * lCol + l] * agI[i]; + if(lhV != 0) + for(int r = 0; r < rCol; r++) { + final double rhV = rhValues[i * rCol + r]; + final double va = lhV * rhV; + resV[leftOff + this._colIndexes[r]] += va; + } + } } } else if(lhs instanceof ColGroupConst || this instanceof ColGroupConst) { - // double[] r = this instanceof ColGroupConst ? rhValues : this._dict.colSum(getCounts(), rCol); - // double[] l = lhs instanceof ColGroupConst ? lhValues : lhs._dict.colSum(lhs.getCounts(), lCol); - // vectorVectorMultiply(l, lhs._colIndexes, r, this._colIndexes, result, numCols); + double[] r = this instanceof ColGroupConst ? rhValues : this._dict.colSum(getCounts(), rCol); + double[] l = lhs instanceof ColGroupConst ? lhValues : lhs._dict.colSum(lhs.getCounts(), lCol); + vectorVectorMultiply(l, lhs._colIndexes, r, this._colIndexes, resV, numCols); } else { int[] countsRight = getCounts(); @@ -858,36 +849,36 @@ public abstract class ColGroupValue extends ColGroupCompressed implements Clonea if(skipRight > threshold && percentageRight > percentageLeft && !(this instanceof ColGroupDDC)) { double[] mct = this._dict.getMostCommonTuple(this.getCounts(), rCol); - // double[] lhsSum = lhs._dict.colSum(lhs.getCounts(), lCol); - // if(mct != null) - // vectorVectorMultiply(lhsSum, lhs._colIndexes, mct, this._colIndexes, result, numCols); + double[] lhsSum = lhs._dict.colSum(lhs.getCounts(), lCol); + if(mct != null) + vectorVectorMultiply(lhsSum, lhs._colIndexes, mct, this._colIndexes, resV, numCols); ColGroupValue thisM = (mct != null) ? (ColGroupValue) this .copyAndSet(this._dict.subtractTuple(mct)) : this; Dictionary preAgg = lhs.preAggregateThatIndexStructure(thisM, true); matrixMultDictionariesAndOutputToColIndexes(lhValues, preAgg.getValues(), lhs._colIndexes, - this._colIndexes, result, numCols); + this._colIndexes, resV, numCols); } else if(skipLeft > threshold && !(lhs instanceof ColGroupDDC)) { double[] mct = lhs._dict.getMostCommonTuple(lhs.getCounts(), lCol); - // double[] thisColSum = this._dict.colSum(getCounts(), rCol); - // if(mct != null) - // vectorVectorMultiply(mct, lhs._colIndexes, thisColSum, this._colIndexes, result, numCols); + double[] thisColSum = this._dict.colSum(getCounts(), rCol); + if(mct != null) + vectorVectorMultiply(mct, lhs._colIndexes, thisColSum, this._colIndexes, resV, numCols); ColGroupValue lhsM = (mct != null) ? (ColGroupValue) lhs.copyAndSet(lhs._dict.subtractTuple(mct)) : lhs; Dictionary preAgg = this.preAggregateThatIndexStructure(lhsM, true); matrixMultDictionariesAndOutputToColIndexes(preAgg.getValues(), rhValues, lhs._colIndexes, - this._colIndexes, result, numCols); + this._colIndexes, resV, numCols); } else if(nvR * rCol < nvL * lCol) { Dictionary preAgg = lhs.preAggregateThatIndexStructure(this, false); matrixMultDictionariesAndOutputToColIndexes(lhValues, preAgg.getValues(), lhs._colIndexes, - this._colIndexes, result, numCols); + this._colIndexes, resV, numCols); } else { Dictionary preAgg = this.preAggregateThatIndexStructure(lhs, false); matrixMultDictionariesAndOutputToColIndexes(preAgg.getValues(), rhValues, lhs._colIndexes, - this._colIndexes, result, numCols); + this._colIndexes, resV, numCols); } } } @@ -1014,56 +1005,62 @@ public abstract class ColGroupValue extends ColGroupCompressed implements Clonea return !_zeros; } - @Override - public void leftMultBySparseMatrix(SparseBlock sb, double[] result, int numRows, int numCols, int rl, int ru) { - double[] values = getValues(); - for(int i = rl; i < ru; i++) { - leftMultBySparseMatrix(sb, result, values, numRows, numCols, i); - } - } - /** * Multiply with a matrix on the left. * - * @param matrix matrix to left multiply - * @param result matrix block result - * @param values The materialized values contained in the ColGroupValue - * @param numRows The number of rows in the matrix input - * @param numCols The number of columns in the colGroups parent matrix. - * @param rl The row to start the matrix multiplication from - * @param ru The row to stop the matrix multiplication at. + * @param matrix matrix to left multiply + * @param result matrix block result + * @param values The materialized values contained in the ColGroupValue + * @param rl The row to start the matrix multiplication from + * @param ru The row to stop the matrix multiplication at. */ - public void leftMultByMatrix(double[] matrix, double[] result, double[] values, int numRows, int numCols, int rl, - int ru) { - int numVals = getNumValues(); + public void leftMultByMatrix(MatrixBlock matrix, MatrixBlock result, double[] values, int rl, int ru) { + final int numVals = getNumValues(); + + DenseBlock dictV = new DenseBlockFP64(new int[] {numVals, _colIndexes.length}, values); + MatrixBlock dictM = new MatrixBlock(numVals, _colIndexes.length, dictV); + dictM.examSparsity(); + MatrixBlock tmpRes = new MatrixBlock(1, _colIndexes.length, false); for(int i = rl; i < ru; i++) { - double[] vals = preAggregate(matrix, i); - postScaling(values, vals, result, numVals, i, numCols); + double[] vals = matrix.isInSparseFormat() ? preAggregateSparseWithCheck(matrix.getSparseBlock(), + i) : preAggregate(matrix.getDenseBlockValues(), i); + if(vals != null) { + DenseBlock preAggV = new DenseBlockFP64(new int[] {1, numVals}, vals); + MatrixBlock preAgg = new MatrixBlock(1, numVals, preAggV); + preAgg.setNonZeros(numVals); + // LOG.error("PreAgg Sparsity " + preAgg.getSparsity() + " nnz " + preAgg.getNonZeros()); + LibMatrixMult.matrixMult(preAgg, dictM, tmpRes); + addToResult(tmpRes, result, i); + tmpRes.reset(); + } } } - @Override - public void leftMultByMatrix(double[] matrix, double[] result, int numRows, int numCols, int rl, int ru) { - leftMultByMatrix(matrix, result, getValues(), numRows, numCols, rl, ru); + private void addToResult(MatrixBlock tmp, MatrixBlock result, int row) { + if(tmp.isEmpty()) + return; + else if(tmp.isInSparseFormat()) { + throw new NotImplementedException(); + } + else { + final double[] tmpV = tmp.getDenseBlockValues(); + final double[] retV = result.getDenseBlockValues(); + final int nColRet = result.getNumColumns(); + // final int nColTmp = tmp.getNumColumns(); + final int offR = row * nColRet; + // for(int row = rl, offT = 0, offR = rl * nColRet; row < ru; row++, offT += nColTmp, offR += nColRet) { + for(int col = 0; col < _colIndexes.length; col++) { + final int colOffset = _colIndexes[col]; + retV[offR + colOffset] += tmpV[col]; + } + // } + } + } - /** - * Multiply with a sparse matrix on the left hand side, and add the values to the output result - * - * @param sb The sparse block to multiply with - * @param result The linearized output matrix - * @param values The values contained in the dictionary, this parameter is here to allow materialization once. - * @param numRows The number of rows in the left hand side input matrix (the sparse one) - * @param numCols The number of columns in the compression. - * @param row The row index of the sparse row to multiply with. - */ - public void leftMultBySparseMatrix(SparseBlock sb, double[] result, double[] values, int numRows, int numCols, - int row) { - if(!sb.isEmpty(row)) { - final int numVals = getNumValues(); - double[] vals = preAggregateSparse(sb, row); - postScaling(values, vals, result, numVals, row, numCols); - } + @Override + public void leftMultByMatrix(MatrixBlock matrix, MatrixBlock result, int rl, int ru) { + leftMultByMatrix(matrix, result, getValues(), rl, ru); } public AColGroup rightMultByMatrix(MatrixBlock right) { diff --git a/src/main/java/org/apache/sysds/runtime/compress/estim/CompressedSizeEstimator.java b/src/main/java/org/apache/sysds/runtime/compress/estim/CompressedSizeEstimator.java index 4902cb8..d4b2a63 100644 --- a/src/main/java/org/apache/sysds/runtime/compress/estim/CompressedSizeEstimator.java +++ b/src/main/java/org/apache/sysds/runtime/compress/estim/CompressedSizeEstimator.java @@ -48,7 +48,7 @@ public abstract class CompressedSizeEstimator { /** The number of columns in the matrix block, extracted to a field because the matrix could be transposed */ final protected int _numCols; /** The compression settings to use, for estimating the size, and compress the ColGroups. */ - final protected CompressionSettings _compSettings; + final protected CompressionSettings _cs; /** * Boolean specifying if the _data is in transposed format. This is used to select the correct readers for the @@ -61,23 +61,22 @@ public abstract class CompressedSizeEstimator { * * protected because the factory should be used to construct the CompressedSizeEstimator * - * @param data The matrix block to extract information from - * @param compSettings The Compression settings used. + * @param data The matrix block to extract information from + * @param cs The Compression settings used. */ - protected CompressedSizeEstimator(MatrixBlock data, CompressionSettings compSettings, boolean transposed) { + protected CompressedSizeEstimator(MatrixBlock data, CompressionSettings cs) { _data = data; - _transposed = transposed; + _transposed = cs.transposed; _numRows = _transposed ? _data.getNumColumns() : _data.getNumRows(); _numCols = _transposed ? _data.getNumRows() : _data.getNumColumns(); - _compSettings = compSettings; + _cs = cs; } - - public int getNumRows(){ + public int getNumRows() { return _numRows; } - public int getNumColumns(){ + public int getNumColumns() { return _numCols; } @@ -124,8 +123,8 @@ public abstract class CompressedSizeEstimator { * @return The size factors estimated from the Bit Map. */ public EstimationFactors estimateCompressedColGroupSize(ABitmap ubm, int[] colIndexes) { - return EstimationFactors.computeSizeEstimationFactors(ubm, - _compSettings.validCompressions.contains(CompressionType.RLE), _numRows, colIndexes); + return EstimationFactors.computeSizeEstimationFactors(ubm, _cs.validCompressions.contains(CompressionType.RLE), + _numRows, colIndexes); } private CompressedSizeInfoColGroup[] CompressedSizeInfoColGroup(int clen) { diff --git a/src/main/java/org/apache/sysds/runtime/compress/estim/CompressedSizeEstimatorExact.java b/src/main/java/org/apache/sysds/runtime/compress/estim/CompressedSizeEstimatorExact.java index a118722..40677e9 100644 --- a/src/main/java/org/apache/sysds/runtime/compress/estim/CompressedSizeEstimatorExact.java +++ b/src/main/java/org/apache/sysds/runtime/compress/estim/CompressedSizeEstimatorExact.java @@ -29,16 +29,17 @@ import org.apache.sysds.runtime.matrix.data.MatrixBlock; */ public class CompressedSizeEstimatorExact extends CompressedSizeEstimator { - public CompressedSizeEstimatorExact(MatrixBlock data, CompressionSettings compSettings, boolean transposed) { - super(data, compSettings, transposed); + public CompressedSizeEstimatorExact(MatrixBlock data, CompressionSettings compSettings) { + super(data, compSettings); } @Override public CompressedSizeInfoColGroup estimateCompressedColGroupSize(int[] colIndexes) { ABitmap entireBitMap = BitmapEncoder.extractBitmap(colIndexes, _data, _transposed); return new CompressedSizeInfoColGroup(estimateCompressedColGroupSize(entireBitMap, colIndexes), - _compSettings.validCompressions); + _cs.validCompressions); } + @Override public String toString() { StringBuilder sb = new StringBuilder(); diff --git a/src/main/java/org/apache/sysds/runtime/compress/estim/CompressedSizeEstimatorFactory.java b/src/main/java/org/apache/sysds/runtime/compress/estim/CompressedSizeEstimatorFactory.java index a886eae..cf5437e 100644 --- a/src/main/java/org/apache/sysds/runtime/compress/estim/CompressedSizeEstimatorFactory.java +++ b/src/main/java/org/apache/sysds/runtime/compress/estim/CompressedSizeEstimatorFactory.java @@ -29,26 +29,19 @@ public class CompressedSizeEstimatorFactory { public static final int minimumSampleSize = 2000; - public static CompressedSizeEstimator getSizeEstimator(MatrixBlock data, CompressionSettings compSettings) { - - MatrixBlock shallowCopy = new MatrixBlock().copyShallow(data); - long elements = compSettings.transposed ? data.getNumColumns() : data.getNumRows(); - elements = data.getNonZeros() / (compSettings.transposed ? data.getNumRows() : data.getNumColumns()); - CompressedSizeEstimator est; + public static CompressedSizeEstimator getSizeEstimator(MatrixBlock data, CompressionSettings cs) { + final long nRows = cs.transposed ? data.getNumColumns() : data.getNumRows(); + // Calculate the sample size. // If the sample size is very small, set it to the minimum size - int sampleSize = Math.max((int) Math.ceil(elements * compSettings.samplingRatio), minimumSampleSize); - if(compSettings.samplingRatio >= 1.0 || elements < minimumSampleSize || sampleSize > elements) { - est = new CompressedSizeEstimatorExact(shallowCopy, compSettings, compSettings.transposed); - } - else { - int[] sampleRows = CompressedSizeEstimatorSample.getSortedUniformSample( - compSettings.transposed ? data.getNumColumns() : data.getNumRows(), - sampleSize, - compSettings.seed); - est = new CompressedSizeEstimatorSample(shallowCopy, compSettings, sampleRows, compSettings.transposed); - } + final int sampleSize = Math.max((int) Math.ceil(nRows * cs.samplingRatio), minimumSampleSize); + + CompressedSizeEstimator est; + if(cs.samplingRatio >= 1.0 || nRows < minimumSampleSize || sampleSize > nRows) + est = new CompressedSizeEstimatorExact(data, cs); + else + est = new CompressedSizeEstimatorSample(data, cs, sampleSize); LOG.debug("Estimating using: " + est); return est; diff --git a/src/main/java/org/apache/sysds/runtime/compress/estim/CompressedSizeEstimatorSample.java b/src/main/java/org/apache/sysds/runtime/compress/estim/CompressedSizeEstimatorSample.java index af94473..3098394 100644 --- a/src/main/java/org/apache/sysds/runtime/compress/estim/CompressedSizeEstimatorSample.java +++ b/src/main/java/org/apache/sysds/runtime/compress/estim/CompressedSizeEstimatorSample.java @@ -38,61 +38,48 @@ import org.apache.sysds.runtime.util.UtilFunctions; public class CompressedSizeEstimatorSample extends CompressedSizeEstimator { - private static final int FORCE_TRANSPOSE_ON_SAMPLE_THRESHOLD = 8000; - private final int[] _sampleRows; - private final MatrixBlock _sample; private HashMap<Integer, Double> _solveCache = null; /** * CompressedSizeEstimatorSample, samples from the input data and estimates the size of the compressed matrix. * - * @param data The input data toSample from - * @param compSettings The Settings used for the sampling, and compression, contains information such as seed. - * @param sampleRows The rows sampled - * @param transposed Boolean specifying if the input is already transposed. + * @param data The input data toSample from + * @param cs The Settings used for the sampling, and compression, contains information such as seed. + * @param sampleSize The size to sample from the data. */ - public CompressedSizeEstimatorSample(MatrixBlock data, CompressionSettings compSettings, int[] sampleRows, - boolean transposed) { - super(data, compSettings, transposed); - _sampleRows = sampleRows; + public CompressedSizeEstimatorSample(MatrixBlock data, CompressionSettings cs, int sampleSize) { + super(data, cs); + _sampleRows = CompressedSizeEstimatorSample.getSortedUniformSample(_numRows, sampleSize, _cs.seed); _solveCache = new HashMap<>(); - _sample = sampleData(data, compSettings, sampleRows, transposed); + _sample = sampleData(); } - protected MatrixBlock sampleData(MatrixBlock data, CompressionSettings compSettings, int[] sampleRows, - boolean transposed) { + protected MatrixBlock sampleData() { MatrixBlock sampledMatrixBlock; - if(data.isInSparseFormat() && !transposed) { - sampledMatrixBlock = new MatrixBlock(sampleRows.length, data.getNumColumns(), true); - SparseRow[] rows = new SparseRow[sampleRows.length]; - SparseBlock in = data.getSparseBlock(); - for(int i = 0; i < sampleRows.length; i++) { - rows[i] = in.get(sampleRows[i]); - } + if(_data.isInSparseFormat() && !_cs.transposed) { + sampledMatrixBlock = new MatrixBlock(_sampleRows.length, _data.getNumColumns(), true); + SparseRow[] rows = new SparseRow[_sampleRows.length]; + SparseBlock in = _data.getSparseBlock(); + for(int i = 0; i < _sampleRows.length; i++) + rows[i] = in.get(_sampleRows[i]); + sampledMatrixBlock.setSparseBlock(new SparseBlockMCSR(rows, false)); sampledMatrixBlock.recomputeNonZeros(); _transposed = true; sampledMatrixBlock = LibMatrixReorg.transposeInPlace(sampledMatrixBlock, 16); } else { - MatrixBlock select = (transposed) ? new MatrixBlock(data.getNumColumns(), 1, - true) : new MatrixBlock(data.getNumRows(), 1, true); - for(int i = 0; i < sampleRows.length; i++) - select.appendValue(sampleRows[i], 0, 1); + MatrixBlock select = (_cs.transposed) ? new MatrixBlock(_data.getNumColumns(), 1, + false) : new MatrixBlock(_data.getNumRows(), 1, false); + for(int i = 0; i < _sampleRows.length; i++) + select.appendValue(_sampleRows[i], 0, 1); - sampledMatrixBlock = data.removeEmptyOperations(new MatrixBlock(), !transposed, true, select); + sampledMatrixBlock = _data.removeEmptyOperations(new MatrixBlock(), !_cs.transposed, true, select); } - if(!transposed && sampledMatrixBlock.isInSparseFormat() && - sampleRows.length > FORCE_TRANSPOSE_ON_SAMPLE_THRESHOLD) { - _transposed = true; - sampledMatrixBlock = LibMatrixReorg.transpose(sampledMatrixBlock, - new MatrixBlock(sampleRows.length, data.getNumRows(), true), 1); - } - if(sampledMatrixBlock.isEmpty()) throw new DMLCompressionException("Empty sample block"); @@ -104,7 +91,7 @@ public class CompressedSizeEstimatorSample extends CompressedSizeEstimator { public CompressedSizeInfoColGroup estimateCompressedColGroupSize(int[] colIndexes) { final int sampleSize = _sampleRows.length; // final int numCols = colIndexes.length; - + final double scalingFactor = ((double) _numRows / sampleSize); // extract statistics from sample final ABitmap ubm = BitmapEncoder.extractBitmap(colIndexes, _sample, _transposed); final EstimationFactors fact = EstimationFactors.computeSizeEstimationFactors(ubm, false, _numRows, colIndexes); @@ -114,50 +101,44 @@ public class CompressedSizeEstimatorSample extends CompressedSizeEstimator { if(numZerosInSample == sampleSize || ubm.getOffsetList() == null) { // Since we sample, and this column seems to be empty, we set the return to 1 value detected. // aka 1 value, and 1 offset. - // This makes it more robust in the co coding of Columns - return new CompressedSizeInfoColGroup( - new EstimationFactors(colIndexes, 1, 1, _numRows - 1, 2, 1, _numRows, lossy, true, 0, 0), - _compSettings.validCompressions); + // This makes it more robust in the coCoding of Columns + return new CompressedSizeInfoColGroup(new EstimationFactors(colIndexes, 1, 1, _numRows - 1, 2, 1, _numRows, + lossy, true, (double) 1 / _numRows, (double) 1 / ubm.getNumColumns()), _cs.validCompressions); } - // estimate number of distinct values (incl fixes for anomalies w/ large sample fraction) + // Estimate number of distinct values (incl fixes for anomalies w/ large sample fraction) int totalCardinality = getNumDistinctValues(ubm, _numRows, sampleSize, _solveCache); - totalCardinality = Math.max(totalCardinality, fact.numVals); - totalCardinality = lossy ? Math.min(totalCardinality, 256) : totalCardinality; - totalCardinality = Math.min(totalCardinality, _numRows); - - // Number of unseen values - // int unseenVals = totalCardinality - fact.numVals; + // Number of unique is trivially bounded by the sampled number of uniques and the number of rows. + totalCardinality = Math.min(Math.max(totalCardinality, fact.numVals), _numRows); // estimate number of non-zeros (conservatively round up) final double C = Math.max(1 - (double) fact.numSingle / sampleSize, (double) sampleSize / _numRows); - final int numNonZeros = Math.max( - (int) Math.floor(_numRows - (double) (_numRows / sampleSize) * C * numZerosInSample), totalCardinality); + final int numNonZeros = Math.max((int) Math.floor(_numRows - scalingFactor * C * numZerosInSample), + totalCardinality); // estimate number of segments and number of runs incl correction for // empty segments and empty runs (via expected mean of offset value) // int numUnseenSeg = (int) (unseenVals * Math.ceil((double) _numRows / BitmapEncoder.BITMAP_BLOCK_SZ / 2)); - final int totalNumRuns = _compSettings.validCompressions.contains(CompressionType.RLE) && + final int totalNumRuns = _cs.validCompressions.contains(CompressionType.RLE) && ubm.getNumValues() > 0 ? getNumRuns(ubm, sampleSize, _numRows, _sampleRows) : 0; // Largest instance count ... initiate as the number of zeros. int largestInstanceCount = numZerosInSample; - for(IntArrayList a : ubm.getOffsetList()) { + for(IntArrayList a : ubm.getOffsetList()) if(a.size() > largestInstanceCount) largestInstanceCount = a.size(); - } + final boolean zeroIsMostFrequent = largestInstanceCount == numZerosInSample; // Scale largest Instance count to correctly reflect the number of instances. - largestInstanceCount = (int) (((double) _numRows / sampleSize) * largestInstanceCount); - + largestInstanceCount = (int) (scalingFactor * largestInstanceCount); EstimationFactors totalFacts = new EstimationFactors(colIndexes, totalCardinality, numNonZeros, - largestInstanceCount, totalNumRuns, fact.numSingle, _numRows, lossy, zeroIsMostFrequent, fact.tupleSparsity, - fact.overAllSparsity); + largestInstanceCount, totalNumRuns, fact.numSingle, _numRows, lossy, zeroIsMostFrequent, + fact.overAllSparsity, fact.tupleSparsity); // construct new size info summary - return new CompressedSizeInfoColGroup(totalFacts, _compSettings.validCompressions); + return new CompressedSizeInfoColGroup(totalFacts, _cs.validCompressions); } private static int getNumDistinctValues(ABitmap ubm, int numRows, int sampleSize, @@ -330,7 +311,7 @@ public class CompressedSizeEstimatorSample extends CompressedSizeEstimator { * @param smplSize sample size * @return sorted array of integers */ - protected static int[] getSortedUniformSample(int range, int smplSize, long seed) { + private static int[] getSortedUniformSample(int range, int smplSize, long seed) { return UtilFunctions.getSortedSampleIndexes(range, smplSize, seed); } diff --git a/src/main/java/org/apache/sysds/runtime/compress/estim/CompressedSizeInfoColGroup.java b/src/main/java/org/apache/sysds/runtime/compress/estim/CompressedSizeInfoColGroup.java index d25f280..50aeb7e 100644 --- a/src/main/java/org/apache/sysds/runtime/compress/estim/CompressedSizeInfoColGroup.java +++ b/src/main/java/org/apache/sysds/runtime/compress/estim/CompressedSizeInfoColGroup.java @@ -80,7 +80,8 @@ public class CompressedSizeInfoColGroup { _bestCompressionType = bestEntry.getKey(); _minSize = bestEntry.getValue(); - // LOG.error(this); + if(LOG.isTraceEnabled()) + LOG.trace(this); } /** @@ -98,7 +99,6 @@ public class CompressedSizeInfoColGroup { Set<CompressionType> validCompressionTypes) { EstimationFactors fact = new EstimationFactors(columns, oneSide._facts); CompressedSizeInfoColGroup ret = new CompressedSizeInfoColGroup(fact, validCompressionTypes); - // LOG.error(ret); return ret; } @@ -144,11 +144,11 @@ public class CompressedSizeInfoColGroup { return _cardinalityRatio; } - public double getMostCommonFraction(){ + public double getMostCommonFraction() { return (double) _facts.largestOff / _facts.numRows; } - public double getTupleSparsity(){ + public double getTupleSparsity() { return _facts.tupleSparsity; } @@ -215,13 +215,13 @@ public class CompressedSizeInfoColGroup { public String toString() { StringBuilder sb = new StringBuilder(); sb.append("Best Type: " + _bestCompressionType); - sb.append(" facts: " + _facts); sb.append(" Cardinality: "); sb.append(_cardinalityRatio); + sb.append(" mostCommonFraction: "); + sb.append(getMostCommonFraction()); sb.append(" Sizes: "); sb.append(_sizes); - - sb.append("\n"); + sb.append(" facts: " + _facts); return sb.toString(); } } diff --git a/src/main/java/org/apache/sysds/runtime/compress/estim/EstimationFactors.java b/src/main/java/org/apache/sysds/runtime/compress/estim/EstimationFactors.java index 350d9c4..b42e211 100644 --- a/src/main/java/org/apache/sysds/runtime/compress/estim/EstimationFactors.java +++ b/src/main/java/org/apache/sysds/runtime/compress/estim/EstimationFactors.java @@ -23,6 +23,7 @@ import java.util.Arrays; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; +import org.apache.sysds.runtime.DMLCompressionException; import org.apache.sysds.runtime.compress.CompressionSettings; import org.apache.sysds.runtime.compress.utils.ABitmap; import org.apache.sysds.runtime.compress.utils.ABitmap.BitmapType; @@ -102,6 +103,18 @@ public class EstimationFactors { this.containNoZeroValues = numOffs == numRows; this.overAllSparsity = overAllSparsity; this.tupleSparsity = tupleSparsity; + + if(!containNoZeroValues && overAllSparsity >= 1) + throw new DMLCompressionException( + "Invalid Sparsity, if there is zeroOffsets, then the sparsity should be below 1"); + if(overAllSparsity > 1 || overAllSparsity < 0) + throw new DMLCompressionException("Invalid sparsity"); + if(tupleSparsity > 1 || tupleSparsity < 0) + throw new DMLCompressionException("Invalid sparsity"); + if(largestOff > numRows) + throw new DMLCompressionException( + "Invalid number of instance of most common element should be lower than number of rows. " + largestOff + + " > numRows: " + numRows); } protected static EstimationFactors computeSizeEstimationFactors(ABitmap ubm, boolean inclRLE, int numRows, @@ -155,8 +168,7 @@ public class EstimationFactors { @Override public String toString() { StringBuilder sb = new StringBuilder(); - sb.append("\nrows:" + numRows); - sb.append(" cols:" + Arrays.toString(cols)); + sb.append(" rows:" + numRows); sb.append(" num Offsets:" + numOffs); sb.append(" LargestOffset:" + largestOff); sb.append(" num Singles:" + numSingle); @@ -164,6 +176,7 @@ public class EstimationFactors { sb.append(" num Unique Vals:" + numVals); sb.append(" overallSparsity:" + overAllSparsity); sb.append(" tupleSparsity:" + tupleSparsity); + sb.append(" cols:" + Arrays.toString(cols)); return sb.toString(); } } diff --git a/src/main/java/org/apache/sysds/runtime/compress/lib/CLALibLeftMultBy.java b/src/main/java/org/apache/sysds/runtime/compress/lib/CLALibLeftMultBy.java index 2779a68..9c73f7c 100644 --- a/src/main/java/org/apache/sysds/runtime/compress/lib/CLALibLeftMultBy.java +++ b/src/main/java/org/apache/sysds/runtime/compress/lib/CLALibLeftMultBy.java @@ -109,8 +109,7 @@ public class CLALibLeftMultBy { if(k <= 1) { for(int i = 0; i < groups.size(); i++) - leftMultByCompressedTransposedMatrix(groups.get(i), groups, result.getDenseBlockValues(), - result.getNumRows(), result.getNumColumns(), i, groups.size()); + leftMultByCompressedTransposedMatrix(groups.get(i), groups, result, i, groups.size()); } else { try { @@ -213,8 +212,7 @@ public class CLALibLeftMultBy { public Object call() { try { IPreAggregate.setupThreadLocalMemory(1024); - leftMultByCompressedTransposedMatrix(_left, _groups, _ret.getDenseBlockValues(), _ret.getNumRows(), - _ret.getNumColumns(), _start, _end); + leftMultByCompressedTransposedMatrix(_left, _groups, _ret, _start, _end); } catch(Exception e) { @@ -227,19 +225,15 @@ public class CLALibLeftMultBy { private static void leftMultByCompressedTransposedMatrix(List<AColGroup> thisCG, List<AColGroup> thatCG, MatrixBlock ret) { - double[] c = ret.getDenseBlockValues(); - for(AColGroup lhs : thatCG) { - leftMultByCompressedTransposedMatrix(lhs, thisCG, c, ret.getNumRows(), ret.getNumColumns(), 0, - thisCG.size()); - } + for(AColGroup lhs : thatCG) + leftMultByCompressedTransposedMatrix(lhs, thisCG, ret, 0, thisCG.size()); } - private static void leftMultByCompressedTransposedMatrix(AColGroup lhs, List<AColGroup> thisCG, double[] c, - int rows, int cols, int colGroupStart, int colGroupEnd) { + private static void leftMultByCompressedTransposedMatrix(AColGroup lhs, List<AColGroup> thisCG, MatrixBlock ret, + int colGroupStart, int colGroupEnd) { - for(; colGroupStart < colGroupEnd; colGroupStart++) { - thisCG.get(colGroupStart).leftMultByAColGroup(lhs, c, rows, cols); - } + for(; colGroupStart < colGroupEnd; colGroupStart++) + thisCG.get(colGroupStart).leftMultByAColGroup(lhs, ret); } @@ -252,29 +246,34 @@ public class CLALibLeftMultBy { } ret.allocateDenseBlock(); - double[] retV = ret.getDenseBlockValues(); if(k == 1) for(int j = 0; j < colGroups.size(); j++) - colGroups.get(j).leftMultByMatrix(that, retV, numColumns); + colGroups.get(j).leftMultByMatrix(that, ret); else { try { ExecutorService pool = CommonThreadPool.get(k); // compute remaining compressed column groups in parallel ArrayList<Callable<Object>> tasks = new ArrayList<>(); - int rowBlockSize = 1; + int rowBlockSize = 8; if(overlapping) { for(int blo = 0; blo < that.getNumRows(); blo += rowBlockSize) { - tasks.add(new LeftMatrixMatrixMultTask(colGroups, that, retV, numColumns, blo, + tasks.add(new LeftMatrixMatrixMultTask(colGroups, that, ret, blo, Math.min(blo + rowBlockSize, that.getNumRows()), v)); } } else { - for(int blo = 0; blo < that.getNumRows(); blo += rowBlockSize) { - for(AColGroup g : colGroups) { - tasks.add(new LeftMatrixColGroupMultTask(g, that, retV, numColumns, blo, - Math.min(blo + rowBlockSize, that.getNumRows()), v)); - } + for(AColGroup g : colGroups) { + // if(g instanceof ColGroupDDC) { + // tasks.add(new LeftMatrixColGroupMultTask(g, that, ret, 0, that.getNumRows(), v)); + // } + // else { + + for(int blo = 0; blo < that.getNumRows(); blo += rowBlockSize) { + tasks.add(new LeftMatrixColGroupMultTask(g, that, ret, blo, + Math.min(blo + rowBlockSize, that.getNumRows()), v)); + } + // } } } @@ -364,9 +363,9 @@ public class CLALibLeftMultBy { for(int j = rl; j < ru; j++) { AColGroup.decompressColumnToBlock(lhs, j, thatGroups); if(!lhs.isEmptyBlock(false)) { - for(AColGroup grp : thisGroups) { - grp.leftMultByMatrix(lhs, tmpret.getDenseBlockValues(), result.getNumColumns()); - } + for(AColGroup grp : thisGroups) + grp.leftMultByMatrix(lhs, tmpret); + double[] tmpRetValues = tmpret.getDenseBlockValues(); double[] resultValues = result.getDenseBlockValues(); int offset = tmpret.getNumColumns() * j; @@ -383,18 +382,16 @@ public class CLALibLeftMultBy { private static class LeftMatrixMatrixMultTask implements Callable<Object> { private final List<AColGroup> _group; private final MatrixBlock _that; - private final double[] _ret; - private final int _numCols; + private final MatrixBlock _ret; private final int _rl; private final int _ru; private final Pair<Integer, int[]> _v; - protected LeftMatrixMatrixMultTask(List<AColGroup> group, MatrixBlock that, double[] ret, int numCols, int rl, - int ru, Pair<Integer, int[]> v) { + protected LeftMatrixMatrixMultTask(List<AColGroup> group, MatrixBlock that, MatrixBlock ret, int rl, int ru, + Pair<Integer, int[]> v) { _group = group; _that = that; _ret = ret; - _numCols = numCols; _rl = rl; _ru = ru; _v = v; @@ -402,16 +399,10 @@ public class CLALibLeftMultBy { @Override public Object call() { - // setup memory pool for reuse - - double[][] materialized = new double[_group.size()][]; - for(int i = 0; i < _group.size(); i++) { - materialized[i] = _group.get(i).getValues(); - } try { - ColGroupValue.setupThreadLocalMemory(_v.getLeft() + 1); + ColGroupValue.setupThreadLocalMemory(_v.getLeft() * (_ru - _rl)); for(int j = 0; j < _group.size(); j++) - _group.get(j).leftMultByMatrix(_that, _ret, _numCols, _rl, _ru); + _group.get(j).leftMultByMatrix(_that, _ret, _rl, _ru); } catch(Exception e) { throw new DMLRuntimeException(e); @@ -423,18 +414,16 @@ public class CLALibLeftMultBy { private static class LeftMatrixColGroupMultTask implements Callable<Object> { private final AColGroup _group; private final MatrixBlock _that; - private final double[] _ret; - private final int _numCols; + private final MatrixBlock _ret; private final int _rl; private final int _ru; private final Pair<Integer, int[]> _v; - protected LeftMatrixColGroupMultTask(AColGroup group, MatrixBlock that, double[] ret, int numCols, int rl, - int ru, Pair<Integer, int[]> v) { + protected LeftMatrixColGroupMultTask(AColGroup group, MatrixBlock that, MatrixBlock ret, int rl, int ru, + Pair<Integer, int[]> v) { _group = group; _that = that; _ret = ret; - _numCols = numCols; _rl = rl; _ru = ru; _v = v; @@ -444,8 +433,8 @@ public class CLALibLeftMultBy { public Object call() { try { - ColGroupValue.setupThreadLocalMemory(_v.getLeft() + 1); - _group.leftMultByMatrix(_that, _ret, _numCols, _rl, _ru); + ColGroupValue.setupThreadLocalMemory(_v.getLeft() * (_ru - _rl)); + _group.leftMultByMatrix(_that, _ret, _rl, _ru); } catch(Exception e) { throw new DMLRuntimeException(e); @@ -510,7 +499,7 @@ public class CLALibLeftMultBy { public Object call() { ColGroupValue.setupThreadLocalMemory(_v.getLeft() + 1); for(int i = _gl; i < _gu; i++) { - _grps.get(i).leftMultByMatrix(_rowVector, _result.getDenseBlockValues(), _result.getNumColumns()); + _grps.get(i).leftMultByMatrix(_rowVector, _result); } return null; } diff --git a/src/main/java/org/apache/sysds/runtime/compress/utils/ABitmap.java b/src/main/java/org/apache/sysds/runtime/compress/utils/ABitmap.java index 49e5712..69573a4 100644 --- a/src/main/java/org/apache/sysds/runtime/compress/utils/ABitmap.java +++ b/src/main/java/org/apache/sysds/runtime/compress/utils/ABitmap.java @@ -37,24 +37,23 @@ public abstract class ABitmap { /** Bitmaps (as lists of offsets) for each of the values. */ protected IntArrayList[] _offsetsLists; - /** int specifying the number of zero value groups contained in the rows. */ + /** int specifying the number of zero value tuples contained in the rows. */ protected final int _numZeros; public ABitmap(int numCols, IntArrayList[] offsetsLists, int rows) { _numCols = numCols; int offsetsTotal = 0; - if(offsetsLists != null){ - for(IntArrayList a: offsetsLists){ + if(offsetsLists != null) { + for(IntArrayList a : offsetsLists) offsetsTotal += a.size(); - } + _numZeros = rows - offsetsTotal; - if(_numZeros < 0){ + if(_numZeros < 0) throw new DMLCompressionException("Error in constructing bitmap"); - } } - else{ + else _numZeros = rows; - } + _offsetsLists = offsetsLists; } @@ -85,6 +84,12 @@ public abstract class ABitmap { return ret; } + /** + * Get the number of offsets for a specific unique offset. + * + * @param ix The offset index. + * @return The number of offsets for this unique value. + */ public int getNumOffsets(int ix) { return _offsetsLists[ix].size(); } diff --git a/src/main/java/org/apache/sysds/runtime/compress/utils/Bitmap.java b/src/main/java/org/apache/sysds/runtime/compress/utils/Bitmap.java index 693cf25..09354f6 100644 --- a/src/main/java/org/apache/sysds/runtime/compress/utils/Bitmap.java +++ b/src/main/java/org/apache/sysds/runtime/compress/utils/Bitmap.java @@ -60,7 +60,7 @@ public final class Bitmap extends ABitmap { public int getNumNonZerosInOffset(int idx){ if(_numCols == 1) - return _offsetsLists[idx].size(); + return _values[0] != 0 ? 1 : 0; int nz = 0; for(int i = idx * _numCols; i < (idx+1) * _numCols; i++) nz += _values[i] == 0 ? 0 : 1; diff --git a/src/test/java/org/apache/sysds/test/component/compress/colgroup/JolEstimateOLETest.java b/src/test/java/org/apache/sysds/test/component/compress/colgroup/JolEstimateOLETest.java index 5602ade..a76b2f1 100644 --- a/src/test/java/org/apache/sysds/test/component/compress/colgroup/JolEstimateOLETest.java +++ b/src/test/java/org/apache/sysds/test/component/compress/colgroup/JolEstimateOLETest.java @@ -76,7 +76,7 @@ public class JolEstimateOLETest extends JolEstimateTest { // Random rounded numbers dense mb = DataConverter.convertToMatrixBlock(TestUtils.round(TestUtils.generateTestMatrix(1, 1523, 0, 99, 1.0, 7))); tests.add(new Object[] {mb}); - mb = DataConverter.convertToMatrixBlock(TestUtils.round(TestUtils.generateTestMatrix(1, 4000, 0, 255, 1.0, 7))); + mb = DataConverter.convertToMatrixBlock(TestUtils.round(TestUtils.generateTestMatrix(1, 4000, 0, 100, 1.0, 7))); tests.add(new Object[] {mb}); // Sparse rounded numbers @@ -88,7 +88,7 @@ public class JolEstimateOLETest extends JolEstimateTest { mb = DataConverter .convertToMatrixBlock(TestUtils.round(TestUtils.generateTestMatrix(1, 2321, 0, 99, 0.1, 512))); tests.add(new Object[] {mb}); - mb = DataConverter.convertToMatrixBlock(TestUtils.round(TestUtils.generateTestMatrix(1, 4000, 0, 255, 0.1, 7))); + mb = DataConverter.convertToMatrixBlock(TestUtils.round(TestUtils.generateTestMatrix(1, 4000, 0, 100, 0.1, 7))); tests.add(new Object[] {mb}); mb = DataConverter.convertToMatrixBlock(TestUtils.round(TestUtils.generateTestMatrix(1, 1523, 0, 99, 0.5, 7))); @@ -99,7 +99,7 @@ public class JolEstimateOLETest extends JolEstimateTest { mb = DataConverter .convertToMatrixBlock(TestUtils.round(TestUtils.generateTestMatrix(1, 2321, 0, 99, 0.5, 512))); tests.add(new Object[] {mb}); - mb = DataConverter.convertToMatrixBlock(TestUtils.round(TestUtils.generateTestMatrix(1, 4000, 0, 255, 0.5, 7))); + mb = DataConverter.convertToMatrixBlock(TestUtils.round(TestUtils.generateTestMatrix(1, 4000, 0, 100, 0.5, 7))); tests.add(new Object[] {mb}); // Paper diff --git a/src/test/java/org/apache/sysds/test/component/compress/colgroup/JolEstimateTest.java b/src/test/java/org/apache/sysds/test/component/compress/colgroup/JolEstimateTest.java index 90c3bf6..c32ccbe 100644 --- a/src/test/java/org/apache/sysds/test/component/compress/colgroup/JolEstimateTest.java +++ b/src/test/java/org/apache/sysds/test/component/compress/colgroup/JolEstimateTest.java @@ -32,6 +32,7 @@ import org.apache.sysds.runtime.compress.colgroup.AColGroup; import org.apache.sysds.runtime.compress.colgroup.AColGroup.CompressionType; import org.apache.sysds.runtime.compress.colgroup.ColGroupFactory; import org.apache.sysds.runtime.compress.estim.CompressedSizeEstimatorFactory; +import org.apache.sysds.runtime.compress.estim.CompressedSizeInfoColGroup; import org.apache.sysds.runtime.compress.lib.BitmapEncoder; import org.apache.sysds.runtime.compress.utils.ABitmap; import org.apache.sysds.runtime.matrix.data.MatrixBlock; @@ -101,32 +102,32 @@ public abstract class JolEstimateTest { @Test public void compressedSizeInfoEstimatorSample_90() { - compressedSizeInfoEstimatorSample(0.9, 0.99); + compressedSizeInfoEstimatorSample(0.9, 0.9); } @Test public void compressedSizeInfoEstimatorSample_50() { - compressedSizeInfoEstimatorSample(0.5, 0.95); + compressedSizeInfoEstimatorSample(0.5, 0.90); } @Test public void compressedSizeInfoEstimatorSample_20() { - compressedSizeInfoEstimatorSample(0.2, 0.90); + compressedSizeInfoEstimatorSample(0.2, 0.8); } @Test public void compressedSizeInfoEstimatorSample_10() { - compressedSizeInfoEstimatorSample(0.1, 0.9); + compressedSizeInfoEstimatorSample(0.1, 0.75); } @Test public void compressedSizeInfoEstimatorSample_5() { - compressedSizeInfoEstimatorSample(0.05, 0.9); + compressedSizeInfoEstimatorSample(0.05, 0.7); } @Test public void compressedSizeInfoEstimatorSample_1() { - compressedSizeInfoEstimatorSample(0.01, 0.9); + compressedSizeInfoEstimatorSample(0.01, 0.6); } public void compressedSizeInfoEstimatorSample(double ratio, double tolerance) { @@ -138,13 +139,16 @@ public abstract class JolEstimateTest { .setValidCompressions(EnumSet.of(getCT())).setSeed(seed).create(); cs.transposed = true; - final long estimateCSI = CompressedSizeEstimatorFactory.getSizeEstimator(mbt, cs) - .estimateCompressedColGroupSize().getCompressionSize(cg.getCompType()); + final CompressedSizeInfoColGroup cgsi = CompressedSizeEstimatorFactory.getSizeEstimator(mbt, cs) + .estimateCompressedColGroupSize(); + final long estimateCSI = cgsi.getCompressionSize(cg.getCompType()); final double minTolerance = actualSize * tolerance; final double maxTolerance = actualSize / tolerance; final String rangeString = minTolerance + " < " + estimateCSI + " < " + maxTolerance; boolean res = minTolerance < estimateCSI && estimateCSI < maxTolerance; - assertTrue("CSI Sampled estimate is not in tolerance range " + rangeString + "\n" + cg.toString(), res); + assertTrue( + "CSI Sampled estimate is not in tolerance range " + rangeString + "\n" + cgsi + "\n" + cg.toString(), + res); } catch(Exception e) {
