This is an automated email from the ASF dual-hosted git repository. baunsgaard pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/systemds.git
commit bc2602f1a24de39328424aaa90dce40509152031 Author: baunsgaard <[email protected]> AuthorDate: Tue May 4 20:54:23 2021 +0200 [SYSTEMDS-2999] CLA Decompression Unification This commit change the blocking of the decompression, to no longer align perfectly with 64k blocks, since if a column group contain many columns this is sub optimal. [SYSTEMDS-3000] CLA MM Most Common Element Addition This commit adds an exploitation of the compressed representation that allows add the most common element when multiplying on the left side with a compressed transposed matrix. This is a common occurrence in MMChain and TSMM and allows sparsity exploitation of dense compressed column groups. --- .../runtime/compress/CompressedMatrixBlock.java | 73 ++- .../compress/CompressionSettingsBuilder.java | 2 +- .../sysds/runtime/compress/cocode/CoCodeCost.java | 8 +- .../sysds/runtime/compress/colgroup/AColGroup.java | 3 +- .../compress/colgroup/ColGroupCompressed.java | 33 +- .../runtime/compress/colgroup/ColGroupConst.java | 7 +- .../runtime/compress/colgroup/ColGroupDDC.java | 138 ++--- .../runtime/compress/colgroup/ColGroupEmpty.java | 6 +- .../runtime/compress/colgroup/ColGroupFactory.java | 48 +- .../runtime/compress/colgroup/ColGroupOLE.java | 563 ++++++++++----------- .../runtime/compress/colgroup/ColGroupOffset.java | 14 - .../runtime/compress/colgroup/ColGroupRLE.java | 443 ++++++++-------- .../runtime/compress/colgroup/ColGroupSDC.java | 84 ++- .../compress/colgroup/ColGroupSDCSingle.java | 129 ++++- .../compress/colgroup/ColGroupSDCSingleZeros.java | 26 +- .../compress/colgroup/ColGroupSDCZeros.java | 11 +- .../compress/colgroup/ColGroupUncompressed.java | 98 ++-- .../runtime/compress/colgroup/ColGroupValue.java | 289 +++++++---- .../compress/colgroup/dictionary/ADictionary.java | 27 +- .../compress/colgroup/dictionary/Dictionary.java | 85 +++- .../compress/colgroup/dictionary/QDictionary.java | 39 +- .../sysds/runtime/compress/lib/CLALibCompAgg.java | 1 - .../runtime/compress/lib/CLALibLeftMultBy.java | 63 +-- .../runtime/compress/lib/CLALibRightMultBy.java | 82 +-- .../sysds/runtime/compress/lib/CLALibScalar.java | 3 +- .../sysds/runtime/matrix/data/MatrixValue.java | 4 +- .../component/compress/CompressedTestBase.java | 23 +- .../test/component/compress/TestConstants.java | 2 +- 28 files changed, 1355 insertions(+), 949 deletions(-) diff --git a/src/main/java/org/apache/sysds/runtime/compress/CompressedMatrixBlock.java b/src/main/java/org/apache/sysds/runtime/compress/CompressedMatrixBlock.java index 473a1ec..bb7781e 100644 --- a/src/main/java/org/apache/sysds/runtime/compress/CompressedMatrixBlock.java +++ b/src/main/java/org/apache/sysds/runtime/compress/CompressedMatrixBlock.java @@ -199,7 +199,11 @@ public class CompressedMatrixBlock extends MatrixBlock { Timing time = new Timing(true); // preallocation sparse rows to avoid repeated reallocations - MatrixBlock ret = new MatrixBlock(rlen, clen, false, -1); + MatrixBlock ret = getUncompressedColGroupAndRemoveFromListOfColGroups(); + if(ret != null && getColGroups().size() == 0) + return ret; + else if(ret == null) + ret = new MatrixBlock(rlen, clen, false, -1); ret.allocateDenseBlock(); decompress(ret); @@ -244,7 +248,11 @@ public class CompressedMatrixBlock extends MatrixBlock { return decompress(); Timing time = new Timing(true); - MatrixBlock ret = new MatrixBlock(rlen, clen, false, -1).allocateBlock(); + MatrixBlock ret = getUncompressedColGroupAndRemoveFromListOfColGroups(); + if(ret != null && getColGroups().size() == 0) + return ret; + else if(ret == null) + ret = new MatrixBlock(rlen, clen, false, -1); ret.allocateDenseBlock(); decompress(ret, k); @@ -258,19 +266,13 @@ public class CompressedMatrixBlock extends MatrixBlock { } public MatrixBlock decompress(MatrixBlock ret, int k) { - - if(nonZeros == -1) - ret.setNonZeros(this.recomputeNonZeros()); - else - ret.setNonZeros(nonZeros); try { ExecutorService pool = CommonThreadPool.get(k); int rlen = getNumRows(); final int blkz = CompressionSettings.BITMAP_BLOCK_SZ; - int blklen = (int) Math.ceil((double) rlen / k); - blklen += (blklen % blkz != 0) ? blkz - blklen % blkz : 0; + int blklen = (int) Math.max(64, Math.ceil((double) (blkz) / getNumColumns())); ArrayList<DecompressTask> tasks = new ArrayList<>(); - for(int i = 0; i < k & i * blklen < getNumRows(); i++) + for(int i = 0; i * blklen < getNumRows(); i++) tasks.add(new DecompressTask(_colGroups, ret, i * blklen, Math.min((i + 1) * blklen, rlen), overlappingColGroups)); List<Future<Long>> rtasks = pool.invokeAll(tasks); @@ -285,6 +287,34 @@ public class CompressedMatrixBlock extends MatrixBlock { ret.recomputeNonZeros(); ret.examSparsity(); } + else if(nonZeros == -1) + ret.setNonZeros(this.recomputeNonZeros()); + else + ret.setNonZeros(nonZeros); + return ret; + } + + private MatrixBlock getUncompressedColGroupAndRemoveFromListOfColGroups() { + // If we have a uncompressed column group that covers all of the matrix, + // it makes sense to use as the decompression target. + MatrixBlock ret = null; + // It is only relevant if we are in overlapping state, or we only have a Uncompressed ColumnGroup left. + if(isOverlapping() || _colGroups.size() == 1) { + for(int i = 0; i < _colGroups.size(); i++) { + AColGroup g = _colGroups.get(i); + if(g instanceof ColGroupUncompressed) { + // Find an Uncompressed ColumnGroup + ColGroupUncompressed guc = (ColGroupUncompressed) g; + MatrixBlock gMB = guc.getData(); + // Make sure that it is the correct dimensions + if(gMB.getNumColumns() == this.getNumColumns() && gMB.getNumRows() == this.getNumRows()) { + _colGroups.remove(i); + return gMB; + } + } + } + } + return ret; } @@ -480,21 +510,22 @@ public class CompressedMatrixBlock extends MatrixBlock { // compute matrix mult - boolean tryOverlapOutput = v.getNumColumns() > _colGroups.size() && w != null && w.getNumRows() > 1; - MatrixBlock tmp = CLALibRightMultBy.rightMultByMatrix(this, v, null, k, tryOverlapOutput); + // boolean tryOverlapOutput = v.getNumColumns() > _colGroups.size(); + MatrixBlock tmp = CLALibRightMultBy.rightMultByMatrix(this, v, null, k, true); - if(tmp instanceof CompressedMatrixBlock) { - CompressedMatrixBlock tmpC = (CompressedMatrixBlock) tmp; - if(ctype == ChainType.XtwXv) - tmpC = (CompressedMatrixBlock) CLALibBinaryCellOp.binaryOperations(bop, tmpC, w, null); - tmp = tmpC.decompress(k); + if(ctype == ChainType.XtwXv) { + if(tmp instanceof CompressedMatrixBlock) + tmp = CLALibBinaryCellOp.binaryOperations(bop, (CompressedMatrixBlock) tmp, w, null); + else + LibMatrixBincell.bincellOpInPlace(tmp, w, bop); } - else if(ctype == ChainType.XtwXv) - LibMatrixBincell.bincellOpInPlace(tmp, w, bop); - CLALibLeftMultBy.leftMultByMatrixTransposed(this, tmp, out, k); - out = LibMatrixReorg.transposeInPlace(out, k); + if(tmp instanceof CompressedMatrixBlock) + CLALibLeftMultBy.leftMultByMatrixTransposed(this, (CompressedMatrixBlock) tmp, out, k); + else + CLALibLeftMultBy.leftMultByMatrixTransposed(this, tmp, out, k); + out = LibMatrixReorg.transposeInPlace(out, k); out.recomputeNonZeros(); return out; } diff --git a/src/main/java/org/apache/sysds/runtime/compress/CompressionSettingsBuilder.java b/src/main/java/org/apache/sysds/runtime/compress/CompressionSettingsBuilder.java index 388836f..a53bcfa 100644 --- a/src/main/java/org/apache/sysds/runtime/compress/CompressionSettingsBuilder.java +++ b/src/main/java/org/apache/sysds/runtime/compress/CompressionSettingsBuilder.java @@ -39,7 +39,7 @@ public class CompressionSettingsBuilder { private boolean investigateEstimate = true; private boolean lossy = false; private EnumSet<CompressionType> validCompressions; - private boolean sortValuesByLength = false; + private boolean sortValuesByLength = true; private PartitionerType columnPartitioner; private int maxStaticColGroupCoCode = 10; diff --git a/src/main/java/org/apache/sysds/runtime/compress/cocode/CoCodeCost.java b/src/main/java/org/apache/sysds/runtime/compress/cocode/CoCodeCost.java index 2cbc42f..1cce5b1 100644 --- a/src/main/java/org/apache/sysds/runtime/compress/cocode/CoCodeCost.java +++ b/src/main/java/org/apache/sysds/runtime/compress/cocode/CoCodeCost.java @@ -48,11 +48,15 @@ public class CoCodeCost extends AColumnCoCoder { */ private final int largestDistinct; - private final static int toSmallForAnalysis = 64; + private final int toSmallForAnalysis; + + private final double percentMaxCardinality = 0.08; protected CoCodeCost(CompressedSizeEstimator sizeEstimator, CompressionSettings cs, int numRows) { super(sizeEstimator, cs, numRows); - largestDistinct = Math.min(4096, Math.max(256, (int) (_numRows * 0.01))); + largestDistinct = Math.max(256, (int) (_numRows * percentMaxCardinality)); + toSmallForAnalysis = largestDistinct / 4; + LOG.error("CocodeCost largest Distinct: "+ largestDistinct + " toSmallForAnalysis: " + toSmallForAnalysis); } @Override diff --git a/src/main/java/org/apache/sysds/runtime/compress/colgroup/AColGroup.java b/src/main/java/org/apache/sysds/runtime/compress/colgroup/AColGroup.java index 2f3bdff..8de274f 100644 --- a/src/main/java/org/apache/sysds/runtime/compress/colgroup/AColGroup.java +++ b/src/main/java/org/apache/sysds/runtime/compress/colgroup/AColGroup.java @@ -60,6 +60,7 @@ public abstract class AColGroup implements Serializable { /** * Get the super type of the specific ColGroup Type used. + * * @param c The concrete ColGroupType * @return The super CompressionType. */ @@ -751,8 +752,8 @@ public abstract class AColGroup implements Serializable { @Override public String toString() { StringBuilder sb = new StringBuilder(); + sb.append(" ColGroupType: "); sb.append(this.getClass().getSimpleName()); - sb.append("\n"); sb.append(String.format("\n%15s%5d ", "Columns:", _colIndexes.length)); sb.append(Arrays.toString(_colIndexes)); diff --git a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupCompressed.java b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupCompressed.java index a231283..c8f9a41 100644 --- a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupCompressed.java +++ b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupCompressed.java @@ -89,16 +89,6 @@ public abstract class ColGroupCompressed extends AColGroup { protected abstract boolean sameIndexStructure(ColGroupCompressed that); - public void leftMultByMatrix(MatrixBlock matrix, double[] result, int numCols, int rl, int ru) { - if(matrix.isEmpty()) - return; - else if(matrix.isInSparseFormat()) - leftMultBySparseMatrix(matrix.getSparseBlock(), result, matrix.getNumRows(), numCols, rl, ru); - else { - leftMultByMatrix(matrix.getDenseBlockValues(), result, matrix.getNumRows(), numCols, rl, ru); - } - } - /** * Multiply with a matrix on the left. * @@ -125,22 +115,33 @@ public abstract class ColGroupCompressed extends AColGroup { int ru); @Override - public double getMin() { + public final void leftMultByMatrix(MatrixBlock matrix, double[] result, int numCols, int rl, int ru) { + if(matrix.isEmpty()) + return; + else if(matrix.isInSparseFormat()) + leftMultBySparseMatrix(matrix.getSparseBlock(), result, matrix.getNumRows(), numCols, rl, ru); + else { + leftMultByMatrix(matrix.getDenseBlockValues(), result, matrix.getNumRows(), numCols, rl, ru); + } + } + + @Override + public final double getMin() { return computeMxx(Double.POSITIVE_INFINITY, Builtin.getBuiltinFnObject(BuiltinCode.MIN)); } @Override - public double getMax() { + public final double getMax() { return computeMxx(Double.NEGATIVE_INFINITY, Builtin.getBuiltinFnObject(BuiltinCode.MAX)); } @Override - public void unaryAggregateOperations(AggregateUnaryOperator op, double[] c) { + public final void unaryAggregateOperations(AggregateUnaryOperator op, double[] c) { unaryAggregateOperations(op, c, 0, _numRows); } @Override - public void unaryAggregateOperations(AggregateUnaryOperator op, double[] c, int rl, int ru) { + public final void unaryAggregateOperations(AggregateUnaryOperator op, double[] c, int rl, int ru) { // sum and sumsq (reduceall/reducerow over tuples and counts) if(op.aggOp.increOp.fn instanceof Plus || op.aggOp.increOp.fn instanceof KahanPlus || op.aggOp.increOp.fn instanceof KahanPlusSq) { @@ -174,13 +175,13 @@ public abstract class ColGroupCompressed extends AColGroup { @Override public String toString() { StringBuilder sb = new StringBuilder(); + sb.append(" num Rows: " + getNumRows()); sb.append(super.toString()); - sb.append("num Rows: " + getNumRows()); return sb.toString(); } @Override - public int getNumRows() { + public final int getNumRows() { return _numRows; } diff --git a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupConst.java b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupConst.java index 75ed773..eb38ee0 100644 --- a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupConst.java +++ b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupConst.java @@ -300,7 +300,7 @@ public class ColGroupConst extends ColGroupValue { } @Override - public Dictionary preAggregateThatSDCStructure(ColGroupSDC that, Dictionary ret) { + public Dictionary preAggregateThatSDCStructure(ColGroupSDC that, Dictionary ret, boolean preModified) { throw new DMLCompressionException("Does not make sense to call this"); } @@ -315,6 +315,11 @@ public class ColGroupConst extends ColGroupValue { } @Override + public Dictionary preAggregateThatSDCSingleStructure(ColGroupSDCSingle that, Dictionary ret, boolean preModified){ + throw new DMLCompressionException("Does not make sense to call this"); + } + + @Override protected int containsAllZeroTuple() { return -1; } diff --git a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupDDC.java b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupDDC.java index e9e98d7..aa8a55f 100644 --- a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupDDC.java +++ b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupDDC.java @@ -71,11 +71,11 @@ public class ColGroupDDC extends ColGroupValue { public void decompressToBlockUnSafe(MatrixBlock target, int rl, int ru, int offT, double[] values) { final int nCol = _colIndexes.length; final int tCol = target.getNumColumns(); - double[] c = target.getDenseBlockValues(); + final double[] c = target.getDenseBlockValues(); offT = offT * tCol; for(int i = rl; i < ru; i++, offT += tCol) { - int rowIndex = getIndex(i) * nCol; + final int rowIndex = _data.getIndex(i) * nCol; for(int j = 0; j < nCol; j++) c[offT + _colIndexes[j]] += values[rowIndex + j]; } @@ -87,7 +87,7 @@ public class ColGroupDDC extends ColGroupValue { int ncol = getNumCols(); double[] dictionary = getValues(); for(int i = 0; i < _numRows; i++) { - int rowIndex = getIndex(i) * ncol; + int rowIndex = _data.getIndex(i) * ncol; for(int colIx = 0; colIx < ncol; colIx++) { int origMatrixColIx = getColIndex(colIx); int col = colIndexTargets[origMatrixColIx]; @@ -105,7 +105,7 @@ public class ColGroupDDC extends ColGroupValue { double[] values = getValues(); int nnz = 0; for(int i = 0; i < _numRows; i++) { - int index = getIndex(i); + int index = _data.getIndex(i); if(index < getNumValues()) nnz += ((c[i] += values[(index) * ncol + colpos]) != 0) ? 1 : 0; else @@ -123,7 +123,7 @@ public class ColGroupDDC extends ColGroupValue { final int numValues = getNumValues(); int nnz = 0; for(int i = 0, r = rl; i < ru - rl; i++, r++) { - int index = getIndex(r); + int index = _data.getIndex(r); if(index < numValues) nnz += ((c[i] += values[(index) * ncol + colpos]) != 0) ? 1 : 0; else @@ -138,7 +138,7 @@ public class ColGroupDDC extends ColGroupValue { double[] values = getValues(); final int numValues = getNumValues(); for(int i = 0, r = rl; i < ru - rl; i++, r++) { - int index = getIndex(r); + int index = _data.getIndex(r); if(index < numValues) c[i] += values[(index) * ncol + colpos]; } @@ -152,7 +152,7 @@ public class ColGroupDDC extends ColGroupValue { throw new RuntimeException("Column index " + c + " not in DDC group."); // get value - int index = getIndex(r); + int index = _data.getIndex(r); if(index < getNumValues()) return _dict.getValue(index * _colIndexes.length + ix); else @@ -167,7 +167,7 @@ public class ColGroupDDC extends ColGroupValue { double[] values = _dict.getValues(); for(int i = rl; i < ru; i++) { int lnnz = 0; - int index = getIndex(i); + int index = _data.getIndex(i); if(index < numVals) { for(int colIx = index; colIx < ncol + index; colIx++) { lnnz += (values[colIx]) != 0 ? 1 : 0; @@ -181,7 +181,7 @@ public class ColGroupDDC extends ColGroupValue { protected void computeRowSums(double[] c, boolean square, int rl, int ru, boolean mean) { double[] vals = _dict.sumAllRowsToDouble(square, _colIndexes.length); for(int rix = rl; rix < ru; rix++) - c[rix] += vals[getIndex(rix)]; + c[rix] += vals[_data.getIndex(rix)]; } @Override @@ -189,7 +189,7 @@ public class ColGroupDDC extends ColGroupValue { final int nCol = getNumCols(); double[] preAggregatedRows = _dict.aggregateTuples(builtin, nCol); for(int i = rl; i < ru; i++) - c[i] = builtin.execute(c[i], preAggregatedRows[getIndex(i)]); + c[i] = builtin.execute(c[i], preAggregatedRows[_data.getIndex(i)]); } @Override @@ -200,7 +200,7 @@ public class ColGroupDDC extends ColGroupValue { @Override public int[] getCounts(int rl, int ru, int[] counts) { for(int i = rl; i < ru; i++) { - int index = getIndex(i); + int index = _data.getIndex(i); counts[index]++; } return counts; @@ -211,10 +211,10 @@ public class ColGroupDDC extends ColGroupValue { double[] vals = allocDVector(getNumValues(), true); if(row > 0) for(int i = 0, off = _numRows * row; i < _numRows; i++, off++) - vals[getIndex(i)] += a[off]; + vals[_data.getIndex(i)] += a[off]; else for(int i = 0; i < _numRows; i++) - vals[getIndex(i)] += a[i]; + vals[_data.getIndex(i)] += a[i]; return vals; } @@ -226,34 +226,12 @@ public class ColGroupDDC extends ColGroupValue { int[] indexes = sb.indexes(row); double[] sparseV = sb.values(row); for(int i = sb.pos(row); i < sb.size(row) + sb.pos(row); i++) - vals[getIndex(indexes[i])] += sparseV[i]; + vals[_data.getIndex(indexes[i])] += sparseV[i]; return vals; } /** - * Generic get index in dictionary for value at row position. - * - * @param r row position to get dictionary index for. - * @return The dictionary index - */ - protected int getIndex(int r) { - return _data.getIndex(r); - } - - /** - * Generic get index in dictionary for value at row, col position. If used consider changing to getIndex and - * precalculate offset to row - * - * @param r The row to find - * @param colIx the col index to find - * @return the index in the dictionary containing the specified value - */ - protected int getIndex(int r, int colIx) { - return _data.getIndex(r) * getNumCols() + colIx; - } - - /** * Generic get value for byte-length-agnostic access to first column. * * @param r Global row index @@ -261,7 +239,7 @@ public class ColGroupDDC extends ColGroupValue { * @return value */ protected double getData(int r, double[] values) { - int index = getIndex(r); + int index = _data.getIndex(r); return (index < values.length) ? values[index] : 0.0; } @@ -274,7 +252,7 @@ public class ColGroupDDC extends ColGroupValue { * @return value */ protected double getData(int r, int colIx, double[] values) { - int index = getIndex(r, colIx); + int index = _data.getIndex(r) * _colIndexes.length + colIx; return (index < values.length) ? values[index] : 0.0; } @@ -296,7 +274,7 @@ public class ColGroupDDC extends ColGroupValue { IPreAggregate ag = PreAggregateFactory.ag(retSize); // int[] m = _data.materializeMultiplied(nCol); for(int i = 0; i < this._numRows; i++) - ag.increment(lhs.getIndex(i) + this.getIndex(i) * nCol); + ag.increment(lhs._data.getIndex(i) + this._data.getIndex(i) * nCol); return ag; } @@ -315,9 +293,9 @@ public class ColGroupDDC extends ColGroupValue { int col; for(; i < this._numRows && lIt.hasNext(); i++) { - int row = this.getIndex(i); + int row = this._data.getIndex(i); if(lIt.value() == i) - col = lhs.getIndex(lIt.getDataIndexAndIncrement()); + col = lhs._data.getIndex(lIt.getDataIndexAndIncrement()); else col = offsetToDefault; @@ -325,7 +303,7 @@ public class ColGroupDDC extends ColGroupValue { } col = offsetToDefault; for(; i < this._numRows; i++) { - int row = this.getIndex(i); + int row = this._data.getIndex(i); ag.increment(col + row * nCol); } @@ -344,7 +322,7 @@ public class ColGroupDDC extends ColGroupValue { int col; for(; i < this._numRows && lIt.hasNext(); i++) { - int row = this.getIndex(i); + int row = this._data.getIndex(i); if(lIt.value() == i) { col = 1; lIt.next(); @@ -355,7 +333,7 @@ public class ColGroupDDC extends ColGroupValue { } for(; i < this._numRows; i++) - ag.increment(this.getIndex(i) * nCol); + ag.increment(this._data.getIndex(i) * nCol); return ag; } @@ -369,8 +347,8 @@ public class ColGroupDDC extends ColGroupValue { final AIterator lIt = lhs._indexes.getIterator(); while(lIt.hasNext()) { - int row = this.getIndex(lIt.value()); - int col = lhs.getIndex(lIt.getDataIndexAndIncrement()); + int row = this._data.getIndex(lIt.value()); + int col = lhs._data.getIndex(lIt.getDataIndexAndIncrement()); ag.increment(col + row * nCol); } @@ -387,7 +365,7 @@ public class ColGroupDDC extends ColGroupValue { final AIterator lIt = lhs._indexes.getIterator(); while(lIt.hasNext()) { - int row = this.getIndex(lIt.value()); + int row = this._data.getIndex(lIt.value()); lIt.next(); ag.increment(row); } @@ -409,7 +387,7 @@ public class ColGroupDDC extends ColGroupValue { for(int bixL = 0, offL = 0, sLenL = 0; bixL < bLenL; bixL += sLenL + 1, offL += blksz) { sLenL = lhs._data[bOffL + bixL]; for(int i = 1; i <= sLenL; i++) { - int idx = this.getIndex(offL + lhs._data[bOffL + bixL + i]); + int idx = this._data.getIndex(offL + lhs._data[bOffL + bixL + i]); ag.increment(kl + idx * NVL); } } @@ -433,7 +411,7 @@ public class ColGroupDDC extends ColGroupValue { lenL = lhs._data[boffL + bixL + 1]; final int endL = startL + lenL; for(int i = startL; i < endL; i++) { - int kr = getIndex(i) * NVL; + int kr = _data.getIndex(i) * NVL; ag.increment(kl + kr); } } @@ -445,27 +423,36 @@ public class ColGroupDDC extends ColGroupValue { public Dictionary preAggregateThatDDCStructure(ColGroupDDC that, Dictionary ret) { final int nCol = that._colIndexes.length; for(int r = 0; r < _numRows; r++) - that._dict.addToEntry(ret, that.getIndex(r), this.getIndex(r), nCol); + that._dict.addToEntry(ret, that._data.getIndex(r), this._data.getIndex(r), nCol); return ret; } @Override - public Dictionary preAggregateThatSDCStructure(ColGroupSDC that, Dictionary ret) { + public Dictionary preAggregateThatSDCStructure(ColGroupSDC that, Dictionary ret, boolean preModified) { final AIterator itThat = that._indexes.getIterator(); final int offsetToDefault = that.getNumValues() - 1; final int nCol = that._colIndexes.length; + if(preModified) { + while(itThat.hasNext()) { + final int to = _data.getIndex(itThat.value()); + final int fr = that._data.getIndex(itThat.getDataIndexAndIncrement()); + that._dict.addToEntry(ret, fr, to, nCol); + } + } + else { + int i = 0; - int i = 0; + for(; i < _numRows && itThat.hasNext(); i++) { + int fr = (itThat.value() == i) ? that._data + .getIndex(itThat.getDataIndexAndIncrement()) : offsetToDefault; + that._dict.addToEntry(ret, fr, this._data.getIndex(i), nCol); + } - for(; i < _numRows && itThat.hasNext(); i++) { - int fr = (itThat.value() == i) ? that.getIndex(itThat.getDataIndexAndIncrement()) : offsetToDefault; - that._dict.addToEntry(ret, fr, this.getIndex(i), nCol); + for(; i < _numRows; i++) + that._dict.addToEntry(ret, offsetToDefault, this._data.getIndex(i), nCol); } - for(; i < _numRows; i++) - that._dict.addToEntry(ret, offsetToDefault, this.getIndex(i), nCol); - return ret; } @@ -475,8 +462,8 @@ public class ColGroupDDC extends ColGroupValue { final int nCol = that._colIndexes.length; while(itThat.hasNext()) { - final int to = getIndex(itThat.value()); - final int fr = that.getIndex(itThat.getDataIndexAndIncrement()); + final int to = _data.getIndex(itThat.value()); + final int fr = that._data.getIndex(itThat.getDataIndexAndIncrement()); that._dict.addToEntry(ret, fr, to, nCol); } @@ -489,7 +476,7 @@ public class ColGroupDDC extends ColGroupValue { final int nCol = that._colIndexes.length; while(itThat.hasNext()) { - final int to = getIndex(itThat.value()); + final int to = _data.getIndex(itThat.value()); itThat.next(); that._dict.addToEntry(ret, 0, to, nCol); } @@ -498,6 +485,35 @@ public class ColGroupDDC extends ColGroupValue { } @Override + public Dictionary preAggregateThatSDCSingleStructure(ColGroupSDCSingle that, Dictionary ret, boolean preModified) { + final AIterator itThat = that._indexes.getIterator(); + final int nCol = that._colIndexes.length; + if(preModified) { + while(itThat.hasNext()) { + final int to = _data.getIndex(itThat.value()); + itThat.next(); + that._dict.addToEntry(ret, 0, to, nCol); + } + } + else { + int i = 0; + for(; i < _numRows && itThat.hasNext(); i++) { + if(itThat.value() == i) { + that._dict.addToEntry(ret, 0, this._data.getIndex(i), nCol); + itThat.next(); + } + else + that._dict.addToEntry(ret, 1, this._data.getIndex(i), nCol); + } + + for(; i < _numRows; i++) + that._dict.addToEntry(ret, 1, this._data.getIndex(i), nCol); + } + + return ret; + } + + @Override public boolean sameIndexStructure(ColGroupCompressed that) { return that instanceof ColGroupDDC && ((ColGroupDDC) that)._data == _data; } diff --git a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupEmpty.java b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupEmpty.java index 5192b26..2046d18 100644 --- a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupEmpty.java +++ b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupEmpty.java @@ -140,7 +140,7 @@ public class ColGroupEmpty extends ColGroupCompressed { if(val0 == 0) return this; return new ColGroupConst(_colIndexes, _numRows, - new Dictionary(new double[0]).applyScalarOp(op, val0, _colIndexes.length)); + new Dictionary(new double[_colIndexes.length]).applyScalarOp(op, val0, _colIndexes.length)); } @Override @@ -148,7 +148,7 @@ public class ColGroupEmpty extends ColGroupCompressed { if(sparseSafe) return this; return new ColGroupConst(_colIndexes, _numRows, - new Dictionary(new double[0]).applyBinaryRowOp(op.fn, v, sparseSafe, _colIndexes, left)); + new Dictionary(new double[_colIndexes.length]).applyBinaryRowOp(op.fn, v, sparseSafe, _colIndexes, left)); } @Override @@ -183,7 +183,7 @@ public class ColGroupEmpty extends ColGroupCompressed { @Override protected double computeMxx(double c, Builtin builtin) { - return 0; + return builtin.execute(c, 0); } @Override diff --git a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupFactory.java b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupFactory.java index 49ef877..f16204e 100644 --- a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupFactory.java +++ b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupFactory.java @@ -289,14 +289,16 @@ public class ColGroupFactory { if(LOG.isTraceEnabled()) LOG.trace("compressing to: " + compType); try { + if(cs.sortValuesByLength) + ubm.sortValuesByFrequency(); switch(compType) { case DDC: return compressDDC(colIndexes, rlen, ubm, cs); case RLE: - return new ColGroupRLE(colIndexes, rlen, ubm, cs); + return compressRLE(colIndexes, rlen, ubm, cs); case OLE: - return new ColGroupOLE(colIndexes, rlen, ubm, cs); + return compressOLE(colIndexes, rlen, ubm, cs); case SDC: return compressSDC(colIndexes, rlen, ubm, cs); case UNCOMPRESSED: @@ -460,4 +462,46 @@ public class ColGroupFactory { return new ColGroupDDC(colIndexes, rlen, dict, _data, null); } + + private static AColGroup compressOLE(int[] colIndexes, int rlen, ABitmap ubm, CompressionSettings cs) { + + ADictionary dict = ADictionary.getDictionary(ubm); + ColGroupOLE ole = new ColGroupOLE(rlen); + + final int numVals = ubm.getNumValues(); + char[][] lbitmaps = new char[numVals][]; + int totalLen = 0; + for(int i = 0; i < numVals; i++) { + lbitmaps[i] = ColGroupOLE.genOffsetBitmap(ubm.getOffsetsList(i).extractValues(), ubm.getNumOffsets(i)); + totalLen += lbitmaps[i].length; + } + + // compact bitmaps to linearized representation + ole.createCompressedBitmaps(numVals, totalLen, lbitmaps); + ole._dict = dict; + ole._zeros = ubm.getNumOffsets() < (long) rlen; + ole._colIndexes = colIndexes; + return ole; + } + + private static AColGroup compressRLE(int[] colIndexes, int rlen, ABitmap ubm, CompressionSettings cs) { + + ADictionary dict = ADictionary.getDictionary(ubm); + ColGroupRLE rle = new ColGroupRLE(rlen); + // compress the bitmaps + final int numVals = ubm.getNumValues(); + char[][] lbitmaps = new char[numVals][]; + int totalLen = 0; + + for(int k = 0; k < numVals; k++) { + lbitmaps[k] = ColGroupRLE.genRLEBitmap(ubm.getOffsetsList(k).extractValues(), ubm.getNumOffsets(k)); + totalLen += lbitmaps[k].length; + } + // compact bitmaps to linearized representation + rle.createCompressedBitmaps(numVals, totalLen, lbitmaps); + rle._dict = dict; + rle._zeros = ubm.getNumOffsets() < (long) rlen; + rle._colIndexes = colIndexes; + return rle; + } } diff --git a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupOLE.java b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupOLE.java index cc3d9f1..8101f40 100644 --- a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupOLE.java +++ b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupOLE.java @@ -29,7 +29,6 @@ import org.apache.sysds.runtime.compress.colgroup.dictionary.Dictionary; import org.apache.sysds.runtime.compress.colgroup.offset.AIterator; import org.apache.sysds.runtime.compress.colgroup.pre.IPreAggregate; import org.apache.sysds.runtime.compress.colgroup.pre.PreAggregateFactory; -import org.apache.sysds.runtime.compress.utils.ABitmap; import org.apache.sysds.runtime.compress.utils.LinearAlgebraUtils; import org.apache.sysds.runtime.data.SparseBlock; import org.apache.sysds.runtime.functionobjects.Builtin; @@ -44,7 +43,7 @@ import org.apache.sysds.runtime.matrix.operators.ScalarOperator; public class ColGroupOLE extends ColGroupOffset { private static final long serialVersionUID = -9157676271360528008L; - /** + /** * Constructor for serialization * * @param numRows Number of rows contained @@ -53,30 +52,6 @@ public class ColGroupOLE extends ColGroupOffset { super(numRows); } - /** - * Main constructor. Constructs and stores the necessary bitmaps. - * - * @param colIndices indices (within the block) of the columns included in this column - * @param numRows total number of rows in the parent block - * @param ubm Uncompressed bitmap representation of the block - * @param cs The Compression settings used for compression - */ - protected ColGroupOLE(int[] colIndices, int numRows, ABitmap ubm, CompressionSettings cs) { - super(colIndices, numRows, ubm, cs); - // compress the bitmaps - final int numVals = ubm.getNumValues(); - char[][] lbitmaps = new char[numVals][]; - int totalLen = 0; - for(int i = 0; i < numVals; i++) { - lbitmaps[i] = genOffsetBitmap(ubm.getOffsetsList(i).extractValues(), ubm.getNumOffsets(i)); - totalLen += lbitmaps[i].length; - } - - // compact bitmaps to linearized representation - createCompressedBitmaps(numVals, totalLen, lbitmaps); - - } - protected ColGroupOLE(int[] colIndices, int numRows, boolean zeros, ADictionary dict, char[] bitmaps, int[] bitmapOffs, int[] counts) { super(colIndices, numRows, zeros, dict, counts); @@ -425,304 +400,309 @@ public class ColGroupOLE extends ColGroupOffset { // @Override // public void rightMultByVector(double[] b, double[] c, int rl, int ru, double[] dictVals) { - // final int blksz = CompressionSettings.BITMAP_BLOCK_SZ; - // final int numVals = getNumValues(); - - // if(rl % blksz != 0) - // throw new DMLCompressionException("All blocks should be starting at block segments for OLE"); - - // if(numVals > 1 && _numRows > blksz * 2) { - // // since single segment scans already exceed typical L2 cache sizes - // // and because there is some overhead associated with blocking, the - // // best configuration aligns with L3 cache size (x*vcores*64K*8B < L3) - // // x=4 leads to a good yet slightly conservative compromise for single-/ - // // multi-threaded and typical number of cores and L3 cache sizes - // final int blksz2 = CompressionSettings.BITMAP_BLOCK_SZ * 2; - // int[] apos = skipScan(numVals, rl); - // double[] aval = preaggValues(numVals, b, dictVals); - - // // step 2: cache conscious matrix-vector via horizontal scans - // for(int bi = rl; bi < ru; bi += blksz2) { - // int bimax = Math.min(bi + blksz2, ru); - - // // horizontal segment scan, incl pos maintenance - // for(int k = 0; k < numVals; k++) { - // int boff = _ptr[k]; - // int blen = len(k); - // double val = aval[k]; - // int bix = apos[k]; - - // for(int ii = bi; ii < bimax && bix < blen; ii += blksz) { - // // prepare length, start, and end pos - // int len = _data[boff + bix]; - // int pos = boff + bix + 1; - - // // compute partial results - // LinearAlgebraUtils.vectAdd(val, c, _data, pos, ii, len); - // bix += len + 1; - // } - - // apos[k] = bix; - // } - // } - // } - // else { - // // iterate over all values and their bitmaps - // for(int k = 0; k < numVals; k++) { - // // prepare value-to-add for entire value bitmap - // int boff = _ptr[k]; - // int blen = len(k); - // double val = sumValues(k, b, dictVals); - - // // iterate over bitmap blocks and add values - // if(val != 0) { - // int bix = 0; - // int off = 0; - // int slen = -1; - - // // scan to beginning offset if necessary - // if(rl > 0) { - // for(; bix < blen & off < rl; bix += slen + 1, off += blksz) { - // slen = _data[boff + bix]; - // } - // } - - // // compute partial results - // for(; bix < blen & off < ru; bix += slen + 1, off += blksz) { - // slen = _data[boff + bix]; - // for(int blckIx = 1; blckIx <= slen; blckIx++) { - // c[off + _data[boff + bix + blckIx]] += val; - // } - // } - // } - // } - // } + // final int blksz = CompressionSettings.BITMAP_BLOCK_SZ; + // final int numVals = getNumValues(); + + // if(rl % blksz != 0) + // throw new DMLCompressionException("All blocks should be starting at block segments for OLE"); + + // if(numVals > 1 && _numRows > blksz * 2) { + // // since single segment scans already exceed typical L2 cache sizes + // // and because there is some overhead associated with blocking, the + // // best configuration aligns with L3 cache size (x*vcores*64K*8B < L3) + // // x=4 leads to a good yet slightly conservative compromise for single-/ + // // multi-threaded and typical number of cores and L3 cache sizes + // final int blksz2 = CompressionSettings.BITMAP_BLOCK_SZ * 2; + // int[] apos = skipScan(numVals, rl); + // double[] aval = preaggValues(numVals, b, dictVals); + + // // step 2: cache conscious matrix-vector via horizontal scans + // for(int bi = rl; bi < ru; bi += blksz2) { + // int bimax = Math.min(bi + blksz2, ru); + + // // horizontal segment scan, incl pos maintenance + // for(int k = 0; k < numVals; k++) { + // int boff = _ptr[k]; + // int blen = len(k); + // double val = aval[k]; + // int bix = apos[k]; + + // for(int ii = bi; ii < bimax && bix < blen; ii += blksz) { + // // prepare length, start, and end pos + // int len = _data[boff + bix]; + // int pos = boff + bix + 1; + + // // compute partial results + // LinearAlgebraUtils.vectAdd(val, c, _data, pos, ii, len); + // bix += len + 1; + // } + + // apos[k] = bix; + // } + // } + // } + // else { + // // iterate over all values and their bitmaps + // for(int k = 0; k < numVals; k++) { + // // prepare value-to-add for entire value bitmap + // int boff = _ptr[k]; + // int blen = len(k); + // double val = sumValues(k, b, dictVals); + + // // iterate over bitmap blocks and add values + // if(val != 0) { + // int bix = 0; + // int off = 0; + // int slen = -1; + + // // scan to beginning offset if necessary + // if(rl > 0) { + // for(; bix < blen & off < rl; bix += slen + 1, off += blksz) { + // slen = _data[boff + bix]; + // } + // } + + // // compute partial results + // for(; bix < blen & off < ru; bix += slen + 1, off += blksz) { + // slen = _data[boff + bix]; + // for(int blckIx = 1; blckIx <= slen; blckIx++) { + // c[off + _data[boff + bix + blckIx]] += val; + // } + // } + // } + // } + // } // } // @Override - // public void rightMultByMatrix(int[] outputColumns, double[] preAggregatedB, double[] c, int thatNrColumns, int rl, - // int ru) { - - // final int blksz = CompressionSettings.BITMAP_BLOCK_SZ; - // final int numVals = getNumValues(); - - // if(numVals > 1 && _numRows > blksz * 2) { - // final int blksz2 = blksz * 2; - // int[] apos = skipScan(numVals, rl); - // int blockStart = rl - rl % blksz; - // for(int bi = blockStart; bi < ru; bi += blksz2) { - // int bimax = Math.min(bi + blksz2, ru); - // for(int k = 0; k < numVals; k++) { - // int boff = _ptr[k]; - // int blen = len(k); - // int bix = apos[k]; - // for(int ii = bi; ii < bimax && bix < blen; ii += blksz) { - // int len = _data[boff + bix]; - // int pos = _data[boff + bix + 1]; - // if(pos >= rl) - // addV(c, preAggregatedB, outputColumns, (bi + pos) * thatNrColumns, k); - // bix += len + 1; - // } - // apos[k] = bix; - // } - // } - // } - // else { - // for(int k = 0; k < numVals; k++) { - // int boff = _ptr[k]; - // int blen = len(k); - // int bix = skipScanVal(k, rl); - // int off = rl; - // int slen = 0; - // // compute partial results - // for(; bix < blen & off < ru; bix += slen + 1, off += blksz) { - // slen = _data[boff + bix]; - // for(int blckIx = 1; blckIx <= slen; blckIx++) { - // int rowIdx = (_data[boff + bix + blckIx] + off) * thatNrColumns; - // addV(c, preAggregatedB, outputColumns, rowIdx, k); - // } - // } - // } - // } + // public void rightMultByMatrix(int[] outputColumns, double[] preAggregatedB, double[] c, int thatNrColumns, int + // rl, + // int ru) { + + // final int blksz = CompressionSettings.BITMAP_BLOCK_SZ; + // final int numVals = getNumValues(); + + // if(numVals > 1 && _numRows > blksz * 2) { + // final int blksz2 = blksz * 2; + // int[] apos = skipScan(numVals, rl); + // int blockStart = rl - rl % blksz; + // for(int bi = blockStart; bi < ru; bi += blksz2) { + // int bimax = Math.min(bi + blksz2, ru); + // for(int k = 0; k < numVals; k++) { + // int boff = _ptr[k]; + // int blen = len(k); + // int bix = apos[k]; + // for(int ii = bi; ii < bimax && bix < blen; ii += blksz) { + // int len = _data[boff + bix]; + // int pos = _data[boff + bix + 1]; + // if(pos >= rl) + // addV(c, preAggregatedB, outputColumns, (bi + pos) * thatNrColumns, k); + // bix += len + 1; + // } + // apos[k] = bix; + // } + // } + // } + // else { + // for(int k = 0; k < numVals; k++) { + // int boff = _ptr[k]; + // int blen = len(k); + // int bix = skipScanVal(k, rl); + // int off = rl; + // int slen = 0; + // // compute partial results + // for(; bix < blen & off < ru; bix += slen + 1, off += blksz) { + // slen = _data[boff + bix]; + // for(int blckIx = 1; blckIx <= slen; blckIx++) { + // int rowIdx = (_data[boff + bix + blckIx] + off) * thatNrColumns; + // addV(c, preAggregatedB, outputColumns, rowIdx, k); + // } + // } + // } + // } // } // private static void addV(double[] c, double[] preAggregatedB, int[] outputColumns, int rowIdx, int k) { - // int n = k * outputColumns.length; - // for(int i = 0; i < outputColumns.length; i++) { - // c[rowIdx + outputColumns[i]] += preAggregatedB[n + i]; - // } + // int n = k * outputColumns.length; + // for(int i = 0; i < outputColumns.length; i++) { + // c[rowIdx + outputColumns[i]] += preAggregatedB[n + i]; + // } // } // @Override // public void leftMultByRowVector(double[] a, double[] c, int numVals, double[] values) { - // final int blksz = CompressionSettings.BITMAP_BLOCK_SZ; + // final int blksz = CompressionSettings.BITMAP_BLOCK_SZ; - // if(numVals >= 1 && _numRows > blksz) - // leftMultByRowVectorBlocking(a, c, numVals, values); - // else - // leftMultByRowVectorNonBlocking(a, c, numVals, values); + // if(numVals >= 1 && _numRows > blksz) + // leftMultByRowVectorBlocking(a, c, numVals, values); + // else + // leftMultByRowVectorNonBlocking(a, c, numVals, values); // } // private void leftMultByRowVectorBlocking(double[] a, double[] c, int numVals, double[] values) { - // double[] cvals = preAggregate(a); - // postScaling(values, cvals, c, numVals); + // double[] cvals = preAggregate(a); + // postScaling(values, cvals, c, numVals); // } // private void leftMultByRowVectorNonBlocking(double[] a, double[] c, int numVals, double[] values) { - // // iterate over all values and their bitmaps - // final int blksz = CompressionSettings.BITMAP_BLOCK_SZ; - // final int numCols = getNumCols(); - // for(int k = 0, valOff = 0; k < numVals; k++, valOff += numCols) { - // int boff = _ptr[k]; - // int blen = len(k); - - // // iterate over bitmap blocks and add partial results - // double vsum = 0; - // for(int bix = 0, off = 0; bix < blen; bix += _data[boff + bix] + 1, off += blksz) - // vsum += LinearAlgebraUtils.vectSum(a, _data, off, boff + bix + 1, _data[boff + bix]); - - // // scale partial results by values and write results - // for(int j = 0; j < numCols; j++) - // c[_colIndexes[j]] += vsum * values[valOff + j]; - // } + // // iterate over all values and their bitmaps + // final int blksz = CompressionSettings.BITMAP_BLOCK_SZ; + // final int numCols = getNumCols(); + // for(int k = 0, valOff = 0; k < numVals; k++, valOff += numCols) { + // int boff = _ptr[k]; + // int blen = len(k); + + // // iterate over bitmap blocks and add partial results + // double vsum = 0; + // for(int bix = 0, off = 0; bix < blen; bix += _data[boff + bix] + 1, off += blksz) + // vsum += LinearAlgebraUtils.vectSum(a, _data, off, boff + bix + 1, _data[boff + bix]); + + // // scale partial results by values and write results + // for(int j = 0; j < numCols; j++) + // c[_colIndexes[j]] += vsum * values[valOff + j]; + // } // } // @Override // public void leftMultByMatrix(double[] a, double[] c, double[] values, int numRows, int numCols, int rl, int ru, - // int vOff) { - // final int numVals = getNumValues(); - // final int blksz = CompressionSettings.BITMAP_BLOCK_SZ; - // if(numVals >= 1 && _numRows > blksz) - // leftMultByMatrixBlocking(a, c, values, numRows, numCols, rl, ru, vOff, numVals); - // else - // leftMultByMatrixNonBlocking(a, c, values, numRows, numCols, rl, ru, vOff, numVals); + // int vOff) { + // final int numVals = getNumValues(); + // final int blksz = CompressionSettings.BITMAP_BLOCK_SZ; + // if(numVals >= 1 && _numRows > blksz) + // leftMultByMatrixBlocking(a, c, values, numRows, numCols, rl, ru, vOff, numVals); + // else + // leftMultByMatrixNonBlocking(a, c, values, numRows, numCols, rl, ru, vOff, numVals); // } // private void leftMultByMatrixBlocking(double[] a, double[] c, double[] values, int numRows, int numCols, int rl, - // int ru, int vOff, int numVals) { - // for(int i = rl; i < ru; i++) { - // double[] cvals = preAggregate(a, i); - // postScaling(values, cvals, c, numVals, i, numCols); - // } + // int ru, int vOff, int numVals) { + // for(int i = rl; i < ru; i++) { + // double[] cvals = preAggregate(a, i); + // postScaling(values, cvals, c, numVals, i, numCols); + // } // } - // private void leftMultByMatrixNonBlocking(double[] a, double[] c, double[] values, int numRows, int numCols, int rl, - // int ru, int vOff, int numVals) { - // final int blksz = CompressionSettings.BITMAP_BLOCK_SZ; - // for(int i = rl, offR = vOff * _numRows; i < ru; i++, offR += _numRows) { - // for(int k = 0, valOff = 0; k < numVals; k++, valOff += _colIndexes.length) { - // int boff = _ptr[k]; - // int blen = len(k); - - // // iterate over bitmap blocks and add partial results - // double vsum = 0; - // for(int bix = 0, off = 0; bix < blen; bix += _data[boff + bix] + 1, off += blksz) - // vsum += LinearAlgebraUtils.vectSum(a, _data, off + offR, boff + bix + 1, _data[boff + bix]); - - // // scale partial results by values and write results - - // int offC = i * numCols; - // for(int j = 0; j < _colIndexes.length; j++) { - // int colIx = _colIndexes[j] + offC; - // c[colIx] += vsum * values[valOff + j]; - // } - // } - // } + // private void leftMultByMatrixNonBlocking(double[] a, double[] c, double[] values, int numRows, int numCols, int + // rl, + // int ru, int vOff, int numVals) { + // final int blksz = CompressionSettings.BITMAP_BLOCK_SZ; + // for(int i = rl, offR = vOff * _numRows; i < ru; i++, offR += _numRows) { + // for(int k = 0, valOff = 0; k < numVals; k++, valOff += _colIndexes.length) { + // int boff = _ptr[k]; + // int blen = len(k); + + // // iterate over bitmap blocks and add partial results + // double vsum = 0; + // for(int bix = 0, off = 0; bix < blen; bix += _data[boff + bix] + 1, off += blksz) + // vsum += LinearAlgebraUtils.vectSum(a, _data, off + offR, boff + bix + 1, _data[boff + bix]); + + // // scale partial results by values and write results + + // int offC = i * numCols; + // for(int j = 0; j < _colIndexes.length; j++) { + // int colIx = _colIndexes[j] + offC; + // c[colIx] += vsum * values[valOff + j]; + // } + // } + // } // } // @Override - // public void leftMultBySparseMatrix(SparseBlock sb, double[] c, double[] values, int numRows, int numCols, int row) { - // // final int blksz = CompressionSettings.BITMAP_BLOCK_SZ; - // // final int numVals = getNumValues(); - // throw new NotImplementedException("Not implemented Sparse multiplication OLE"); - // // if(numVals > 1 && _numRows > blksz) - // // leftMultBySparseMatrixBlocking(sb, c, values, numRows, numCols, row, tmpA, numVals); - // // else - // // leftMultBySparseMatrixNonBlock(sb, c, values, numRows, numCols, row, tmpA, numVals); + // public void leftMultBySparseMatrix(SparseBlock sb, double[] c, double[] values, int numRows, int numCols, int + // row) { + // // final int blksz = CompressionSettings.BITMAP_BLOCK_SZ; + // // final int numVals = getNumValues(); + // throw new NotImplementedException("Not implemented Sparse multiplication OLE"); + // // if(numVals > 1 && _numRows > blksz) + // // leftMultBySparseMatrixBlocking(sb, c, values, numRows, numCols, row, tmpA, numVals); + // // else + // // leftMultBySparseMatrixNonBlock(sb, c, values, numRows, numCols, row, tmpA, numVals); + + // } + + // private void leftMultBySparseMatrixBlocking(SparseBlock sb, double[] c, double[] values, int numRows, int + // numCols, + // int row, double[] tmpA, int numVals) { + // final int blksz = CompressionSettings.BITMAP_BLOCK_SZ; + // int sparseEndIndex = sb.size(row) + sb.pos(row); + // int[] indexes = sb.indexes(row); + // double[] sparseV = sb.values(row); + + // // cache blocking config (see matrix-vector mult for explanation) + // final int blksz2 = 2 * CompressionSettings.BITMAP_BLOCK_SZ; + + // // step 1: prepare position and value arrays + // int[] apos = allocIVector(numVals, true); + // double[] cvals = allocDVector(numVals, true); + // // step 2: cache conscious matrix-vector via horizontal scans + // int pI = sb.pos(row); + // for(int ai = 0; ai < _numRows; ai += blksz2) { + // int aimax = Math.min(ai + blksz2, _numRows); + // Arrays.fill(tmpA, 0); + // for(; pI < sparseEndIndex && indexes[pI] < aimax; pI++) { + // if(indexes[pI] >= ai) + // tmpA[indexes[pI] - ai] = sparseV[pI]; + // } + // // horizontal segment scan, incl pos maintenance + // for(int k = 0; k < numVals; k++) { + // int boff = _ptr[k]; + // int blen = len(k); + // int bix = apos[k]; + // double vsum = 0; + // for(int ii = ai; ii < aimax && bix < blen; ii += blksz) { + // int len = _data[boff + bix]; + // int pos = boff + bix + 1; + // int blockId = (ii / blksz) % 2; + // vsum += LinearAlgebraUtils.vectSum(tmpA, _data, blockId * blksz, pos, len); + // bix += len + 1; // } - // private void leftMultBySparseMatrixBlocking(SparseBlock sb, double[] c, double[] values, int numRows, int numCols, - // int row, double[] tmpA, int numVals) { - // final int blksz = CompressionSettings.BITMAP_BLOCK_SZ; - // int sparseEndIndex = sb.size(row) + sb.pos(row); - // int[] indexes = sb.indexes(row); - // double[] sparseV = sb.values(row); - - // // cache blocking config (see matrix-vector mult for explanation) - // final int blksz2 = 2 * CompressionSettings.BITMAP_BLOCK_SZ; - - // // step 1: prepare position and value arrays - // int[] apos = allocIVector(numVals, true); - // double[] cvals = allocDVector(numVals, true); - // // step 2: cache conscious matrix-vector via horizontal scans - // int pI = sb.pos(row); - // for(int ai = 0; ai < _numRows; ai += blksz2) { - // int aimax = Math.min(ai + blksz2, _numRows); - // Arrays.fill(tmpA, 0); - // for(; pI < sparseEndIndex && indexes[pI] < aimax; pI++) { - // if(indexes[pI] >= ai) - // tmpA[indexes[pI] - ai] = sparseV[pI]; - // } - - // // horizontal segment scan, incl pos maintenance - // for(int k = 0; k < numVals; k++) { - // int boff = _ptr[k]; - // int blen = len(k); - // int bix = apos[k]; - // double vsum = 0; - // for(int ii = ai; ii < aimax && bix < blen; ii += blksz) { - // int len = _data[boff + bix]; - // int pos = boff + bix + 1; - // int blockId = (ii / blksz) % 2; - // vsum += LinearAlgebraUtils.vectSum(tmpA, _data, blockId * blksz, pos, len); - // bix += len + 1; - // } - - // apos[k] = bix; - // cvals[k] += vsum; - // } - // } - - // int offC = row * numCols; - // // step 3: scale partial results by values and write to global output - // for(int k = 0, valOff = 0; k < numVals; k++, valOff += _colIndexes.length) - // for(int j = 0; j < _colIndexes.length; j++) { - // int colIx = _colIndexes[j] + offC; - // c[colIx] += cvals[k] * values[valOff + j]; - // } + // apos[k] = bix; + // cvals[k] += vsum; + // } + // } + // int offC = row * numCols; + // // step 3: scale partial results by values and write to global output + // for(int k = 0, valOff = 0; k < numVals; k++, valOff += _colIndexes.length) + // for(int j = 0; j < _colIndexes.length; j++) { + // int colIx = _colIndexes[j] + offC; + // c[colIx] += cvals[k] * values[valOff + j]; // } - // private void leftMultBySparseMatrixNonBlock(SparseBlock sb, double[] c, double[] values, int numRows, int numCols, - // int row, double[] tmpA, int numVals) { - // final int blksz = CompressionSettings.BITMAP_BLOCK_SZ; - // int sparseEndIndex = sb.size(row) + sb.pos(row); - // int[] indexes = sb.indexes(row); - // double[] sparseV = sb.values(row); - - // for(int k = 0, valOff = 0; k < numVals; k++, valOff += _colIndexes.length) { - // int boff = _ptr[k]; - // int blen = len(k); - // double vsum = 0; - // int pI = sb.pos(row); - // for(int bix = 0, off = 0; bix < blen; bix += _data[boff + bix] + 1, off += blksz) { - // // blockId = off / blksz; - // Arrays.fill(tmpA, 0); - // for(; pI < sparseEndIndex && indexes[pI] < off + blksz; pI++) { - // if(indexes[pI] >= off) - // tmpA[indexes[pI] - off] = sparseV[pI]; - // } - // vsum += LinearAlgebraUtils.vectSum(tmpA, _data, 0, boff + bix + 1, _data[boff + bix]); - // } - - // for(int j = 0; j < _colIndexes.length; j++) { - // int Voff = _colIndexes[j] + row * numCols; - // c[Voff] += vsum * values[valOff + j]; - // } - // } + // } + + // private void leftMultBySparseMatrixNonBlock(SparseBlock sb, double[] c, double[] values, int numRows, int + // numCols, + // int row, double[] tmpA, int numVals) { + // final int blksz = CompressionSettings.BITMAP_BLOCK_SZ; + // int sparseEndIndex = sb.size(row) + sb.pos(row); + // int[] indexes = sb.indexes(row); + // double[] sparseV = sb.values(row); + + // for(int k = 0, valOff = 0; k < numVals; k++, valOff += _colIndexes.length) { + // int boff = _ptr[k]; + // int blen = len(k); + // double vsum = 0; + // int pI = sb.pos(row); + // for(int bix = 0, off = 0; bix < blen; bix += _data[boff + bix] + 1, off += blksz) { + // // blockId = off / blksz; + // Arrays.fill(tmpA, 0); + // for(; pI < sparseEndIndex && indexes[pI] < off + blksz; pI++) { + // if(indexes[pI] >= off) + // tmpA[indexes[pI] - off] = sparseV[pI]; + // } + // vsum += LinearAlgebraUtils.vectSum(tmpA, _data, 0, boff + bix + 1, _data[boff + bix]); + // } + + // for(int j = 0; j < _colIndexes.length; j++) { + // int Voff = _colIndexes[j] + row * numCols; + // c[Voff] += vsum * values[valOff + j]; + // } + // } // } @Override @@ -1090,7 +1070,7 @@ public class ColGroupOLE extends ColGroupOffset { for(int bixR = 0, offR = 0, sLenR = 0; bixR < bLenR; bixR += sLenR + 1, offR += blksz) { sLenR = this._data[bOffR + bixR]; for(int j = 1; j <= sLenR; j++) { - int idx = lhs.getIndex(offR + this._data[bOffR + bixR + j]); + int idx = lhs._data.getIndex(offR + this._data[bOffR + bixR + j]); ag.increment(idx + krOff); } } @@ -1214,7 +1194,7 @@ public class ColGroupOLE extends ColGroupOffset { } @Override - public Dictionary preAggregateThatSDCStructure(ColGroupSDC that, Dictionary ret) { + public Dictionary preAggregateThatSDCStructure(ColGroupSDC that, Dictionary ret, boolean preModified) { throw new NotImplementedException(); } @@ -1227,4 +1207,9 @@ public class ColGroupOLE extends ColGroupOffset { public Dictionary preAggregateThatSDCSingleZerosStructure(ColGroupSDCSingleZeros that, Dictionary ret) { throw new NotImplementedException(); } + + @Override + public Dictionary preAggregateThatSDCSingleStructure(ColGroupSDCSingle that, Dictionary ret, boolean preModified){ + throw new NotImplementedException(); + } } diff --git a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupOffset.java b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupOffset.java index 176258a..d33e816 100644 --- a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupOffset.java +++ b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupOffset.java @@ -24,9 +24,7 @@ import java.io.DataOutput; import java.io.IOException; import java.util.Arrays; -import org.apache.sysds.runtime.compress.CompressionSettings; import org.apache.sysds.runtime.compress.colgroup.dictionary.ADictionary; -import org.apache.sysds.runtime.compress.utils.ABitmap; import org.apache.sysds.runtime.compress.utils.LinearAlgebraUtils; import org.apache.sysds.runtime.functionobjects.Builtin; @@ -53,18 +51,6 @@ public abstract class ColGroupOffset extends ColGroupValue { super(numRows); } - /** - * Main constructor. Stores the headers for the individual bitmaps. - * - * @param colIndices indices (within the block) of the columns included in this column - * @param numRows total number of rows in the parent block - * @param ubm Uncompressed bitmap representation of the block - * @param cs The Compression settings used for compression - */ - protected ColGroupOffset(int[] colIndices, int numRows, ABitmap ubm, CompressionSettings cs) { - super(colIndices, numRows, ubm, cs); - } - protected ColGroupOffset(int[] colIndices, int numRows, boolean zeros, ADictionary dict, int[] cachedCounts) { super(colIndices, numRows, dict, cachedCounts); _zeros = zeros; diff --git a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupRLE.java b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupRLE.java index d27913b..1e79abc 100644 --- a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupRLE.java +++ b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupRLE.java @@ -29,7 +29,6 @@ import org.apache.sysds.runtime.compress.colgroup.dictionary.ADictionary; import org.apache.sysds.runtime.compress.colgroup.dictionary.Dictionary; import org.apache.sysds.runtime.compress.colgroup.pre.IPreAggregate; import org.apache.sysds.runtime.compress.colgroup.pre.PreAggregateFactory; -import org.apache.sysds.runtime.compress.utils.ABitmap; import org.apache.sysds.runtime.compress.utils.LinearAlgebraUtils; import org.apache.sysds.runtime.data.SparseBlock; import org.apache.sysds.runtime.functionobjects.Builtin; @@ -51,31 +50,6 @@ public class ColGroupRLE extends ColGroupOffset { super(numRows); } - /** - * Main constructor. Constructs and stores the necessary bitmaps. - * - * @param colIndices indices (within the block) of the columns included in this column - * @param numRows total number of rows in the parent block - * @param ubm Uncompressed bitmap representation of the block - * @param cs The Compression settings used for compression - */ - protected ColGroupRLE(int[] colIndices, int numRows, ABitmap ubm, CompressionSettings cs) { - super(colIndices, numRows, ubm, cs); - - // compress the bitmaps - final int numVals = ubm.getNumValues(); - char[][] lbitmaps = new char[numVals][]; - int totalLen = 0; - - for(int k = 0; k < numVals; k++) { - lbitmaps[k] = genRLEBitmap(ubm.getOffsetsList(k).extractValues(), ubm.getNumOffsets(k)); - totalLen += lbitmaps[k].length; - } - // compact bitmaps to linearized representation - createCompressedBitmaps(numVals, totalLen, lbitmaps); - - } - protected ColGroupRLE(int[] colIndices, int numRows, boolean zeros, ADictionary dict, char[] bitmaps, int[] bitmapOffs, int[] cachedCounts) { super(colIndices, numRows, zeros, dict, cachedCounts); @@ -393,223 +367,225 @@ public class ColGroupRLE extends ColGroupOffset { // @Override // public void rightMultByVector(double[] b, double[] c, int rl, int ru, double[] dictVals) { - // final int numVals = getNumValues(); - // if(numVals >= 1 && _numRows > CompressionSettings.BITMAP_BLOCK_SZ) { - // // L3 cache alignment, see comment rightMultByVector OLE column group - // // core difference of RLE to OLE is that runs are not segment alignment, - // // which requires care of handling runs crossing cache-buckets - // final int blksz = CompressionSettings.BITMAP_BLOCK_SZ * 2; - - // // step 1: prepare position and value arrays - - // // current pos / values per RLE list - - // // step 2: cache conscious matrix-vector via horizontal scans - // for(int bi = rl; bi < ru; bi += blksz) { - // int[] astart = new int[numVals]; - // int[] apos = skipScan(numVals, rl, astart); - // double[] aval = preaggValues(numVals, b, dictVals); - // int bimax = Math.min(bi + blksz, ru); - - // // horizontal segment scan, incl pos maintenance - // for(int k = 0; k < numVals; k++) { - // int boff = _ptr[k]; - // int blen = len(k); - // double val = aval[k]; - // int bix = apos[k]; - // int start = astart[k]; - - // // compute partial results, not aligned - // while(bix < blen & bix < bimax) { - // int lstart = _data[boff + bix]; - // int llen = _data[boff + bix + 1]; - // int len = Math.min(start + lstart + llen, bimax) - Math.max(bi, start + lstart); - // if(len > 0) { - // LinearAlgebraUtils.vectAdd(val, c, Math.max(bi, start + lstart), len); - // } - // start += lstart + llen; - // bix += 2; - // } - - // apos[k] = bix; - // astart[k] = start; - // } - // } - // } - // else { - // for(int k = 0; k < numVals; k++) { - // int boff = _ptr[k]; - // int blen = len(k); - // double val = sumValues(k, b, dictVals); - // int bix = 0; - // int start = 0; - - // // scan to beginning offset if necessary - // if(rl > 0) { // rl aligned with blksz - // while(bix < blen) { - // int lstart = _data[boff + bix]; // start - // int llen = _data[boff + bix + 1]; // len - // if(start + lstart + llen >= rl) - // break; - // start += lstart + llen; - // bix += 2; - // } - // } - - // // compute partial results, not aligned - // while(bix < blen) { - // int lstart = _data[boff + bix]; - // int llen = _data[boff + bix + 1]; - // LinearAlgebraUtils.vectAdd(val, c, Math.max(rl, start + lstart), - // Math.min(start + lstart + llen, ru) - Math.max(rl, start + lstart)); - // if(start + lstart + llen >= ru) - // break; - // start += lstart + llen; - // bix += 2; - // } - // } - // } + // final int numVals = getNumValues(); + // if(numVals >= 1 && _numRows > CompressionSettings.BITMAP_BLOCK_SZ) { + // // L3 cache alignment, see comment rightMultByVector OLE column group + // // core difference of RLE to OLE is that runs are not segment alignment, + // // which requires care of handling runs crossing cache-buckets + // final int blksz = CompressionSettings.BITMAP_BLOCK_SZ * 2; + + // // step 1: prepare position and value arrays + + // // current pos / values per RLE list + + // // step 2: cache conscious matrix-vector via horizontal scans + // for(int bi = rl; bi < ru; bi += blksz) { + // int[] astart = new int[numVals]; + // int[] apos = skipScan(numVals, rl, astart); + // double[] aval = preaggValues(numVals, b, dictVals); + // int bimax = Math.min(bi + blksz, ru); + + // // horizontal segment scan, incl pos maintenance + // for(int k = 0; k < numVals; k++) { + // int boff = _ptr[k]; + // int blen = len(k); + // double val = aval[k]; + // int bix = apos[k]; + // int start = astart[k]; + + // // compute partial results, not aligned + // while(bix < blen & bix < bimax) { + // int lstart = _data[boff + bix]; + // int llen = _data[boff + bix + 1]; + // int len = Math.min(start + lstart + llen, bimax) - Math.max(bi, start + lstart); + // if(len > 0) { + // LinearAlgebraUtils.vectAdd(val, c, Math.max(bi, start + lstart), len); + // } + // start += lstart + llen; + // bix += 2; + // } + + // apos[k] = bix; + // astart[k] = start; + // } + // } + // } + // else { + // for(int k = 0; k < numVals; k++) { + // int boff = _ptr[k]; + // int blen = len(k); + // double val = sumValues(k, b, dictVals); + // int bix = 0; + // int start = 0; + + // // scan to beginning offset if necessary + // if(rl > 0) { // rl aligned with blksz + // while(bix < blen) { + // int lstart = _data[boff + bix]; // start + // int llen = _data[boff + bix + 1]; // len + // if(start + lstart + llen >= rl) + // break; + // start += lstart + llen; + // bix += 2; + // } + // } + + // // compute partial results, not aligned + // while(bix < blen) { + // int lstart = _data[boff + bix]; + // int llen = _data[boff + bix + 1]; + // LinearAlgebraUtils.vectAdd(val, c, Math.max(rl, start + lstart), + // Math.min(start + lstart + llen, ru) - Math.max(rl, start + lstart)); + // if(start + lstart + llen >= ru) + // break; + // start += lstart + llen; + // bix += 2; + // } + // } + // } // } // @Override - // public void rightMultByMatrix(int[] outputColumns, double[] preAggregatedB, double[] c, int thatNrColumns, int rl, - // int ru) { - // final int nrVals = getNumValues(); - // for(int k = 0; k < nrVals; k++) { - // int boff = _ptr[k]; - // int blen = len(k); - // int bix = 0; - // int start = 0; - - // // scan to beginning offset if necessary - // if(rl > 0) { // rl aligned with blksz - // while(bix < blen) { - // int lstart = _data[boff + bix]; // start - // int llen = _data[boff + bix + 1]; // len - // if(start + lstart + llen >= rl) - // break; - // start += lstart + llen; - // bix += 2; - // } - // } - // // compute partial results, not aligned - // while(bix < blen) { - // int lstart = _data[boff + bix]; - // int llen = _data[boff + bix + 1]; - // LinearAlgebraUtils.vectListAdd(preAggregatedB, c, Math.max(rl, start + lstart), - // Math.min(start + lstart + llen, ru), outputColumns, thatNrColumns, k); - // if(start + lstart + llen >= ru) - // break; - // start += lstart + llen; - // bix += 2; - // } - // } + // public void rightMultByMatrix(int[] outputColumns, double[] preAggregatedB, double[] c, int thatNrColumns, int + // rl, + // int ru) { + // final int nrVals = getNumValues(); + // for(int k = 0; k < nrVals; k++) { + // int boff = _ptr[k]; + // int blen = len(k); + // int bix = 0; + // int start = 0; + + // // scan to beginning offset if necessary + // if(rl > 0) { // rl aligned with blksz + // while(bix < blen) { + // int lstart = _data[boff + bix]; // start + // int llen = _data[boff + bix + 1]; // len + // if(start + lstart + llen >= rl) + // break; + // start += lstart + llen; + // bix += 2; + // } + // } + // // compute partial results, not aligned + // while(bix < blen) { + // int lstart = _data[boff + bix]; + // int llen = _data[boff + bix + 1]; + // LinearAlgebraUtils.vectListAdd(preAggregatedB, c, Math.max(rl, start + lstart), + // Math.min(start + lstart + llen, ru), outputColumns, thatNrColumns, k); + // if(start + lstart + llen >= ru) + // break; + // start += lstart + llen; + // bix += 2; + // } + // } // } // @Override // public void leftMultByRowVector(double[] a, double[] c, int numVals, double[] values) { - // final int numCols = getNumCols(); - - // if(numVals >= 1 && _numRows > CompressionSettings.BITMAP_BLOCK_SZ) { - // double[] cvals = preAggregate(a, 0); - // postScaling(values, cvals, c, numVals); - // } - // else { - // // iterate over all values and their bitmaps - // for(int k = 0, valOff = 0; k < numVals; k++, valOff += numCols) { - // int boff = _ptr[k]; - // int blen = len(k); - - // double vsum = 0; - // int curRunEnd = 0; - // for(int bix = 0; bix < blen; bix += 2) { - // int curRunStartOff = curRunEnd + _data[boff + bix]; - // int curRunLen = _data[boff + bix + 1]; - // vsum += LinearAlgebraUtils.vectSum(a, curRunStartOff, curRunLen); - // curRunEnd = curRunStartOff + curRunLen; - // } - - // // scale partial results by values and write results - // for(int j = 0; j < numCols; j++) - // c[_colIndexes[j]] += vsum * values[valOff + j]; - // } - // } + // final int numCols = getNumCols(); + + // if(numVals >= 1 && _numRows > CompressionSettings.BITMAP_BLOCK_SZ) { + // double[] cvals = preAggregate(a, 0); + // postScaling(values, cvals, c, numVals); + // } + // else { + // // iterate over all values and their bitmaps + // for(int k = 0, valOff = 0; k < numVals; k++, valOff += numCols) { + // int boff = _ptr[k]; + // int blen = len(k); + + // double vsum = 0; + // int curRunEnd = 0; + // for(int bix = 0; bix < blen; bix += 2) { + // int curRunStartOff = curRunEnd + _data[boff + bix]; + // int curRunLen = _data[boff + bix + 1]; + // vsum += LinearAlgebraUtils.vectSum(a, curRunStartOff, curRunLen); + // curRunEnd = curRunStartOff + curRunLen; + // } + + // // scale partial results by values and write results + // for(int j = 0; j < numCols; j++) + // c[_colIndexes[j]] += vsum * values[valOff + j]; + // } + // } // } // @Override // public void leftMultByMatrix(final double[] a, final double[] c, final double[] values, final int numRows, - // final int numCols, int rl, final int ru, final int vOff) { - - // final int numVals = getNumValues(); - // if(numVals >= 1 && _numRows > CompressionSettings.BITMAP_BLOCK_SZ) { - // for(int i = rl; i < ru; i++) { - // double[] cvals = preAggregate(a, i); - // postScaling(values, cvals, c, numVals, i, numCols); - // } - // } - // else { - // // iterate over all values and their bitmaps - // for(int i = rl, off = vOff * _numRows; i < ru; i++, off += _numRows) { - // int offC = i * numCols; - // int valOff = 0; - // for(int k = 0; k < numVals; k++) { - // int boff = _ptr[k]; - // int blen = len(k); - - // double vsum = 0; - // int curRunEnd = 0; - // for(int bix = 0; bix < blen; bix += 2) { - // int curRunStartOff = curRunEnd + _data[boff + bix]; - // int curRunLen = _data[boff + bix + 1]; - // vsum += LinearAlgebraUtils.vectSum(a, curRunStartOff + off, curRunLen); - // curRunEnd = curRunStartOff + curRunLen; - // } - - // for(int j = 0; j < _colIndexes.length; j++) { - // int colIx = _colIndexes[j] + offC; - // // scale partial results by values and write results - // c[colIx] += vsum * values[valOff++]; - // } - // } - // } - // } + // final int numCols, int rl, final int ru, final int vOff) { + + // final int numVals = getNumValues(); + // if(numVals >= 1 && _numRows > CompressionSettings.BITMAP_BLOCK_SZ) { + // for(int i = rl; i < ru; i++) { + // double[] cvals = preAggregate(a, i); + // postScaling(values, cvals, c, numVals, i, numCols); + // } + // } + // else { + // // iterate over all values and their bitmaps + // for(int i = rl, off = vOff * _numRows; i < ru; i++, off += _numRows) { + // int offC = i * numCols; + // int valOff = 0; + // for(int k = 0; k < numVals; k++) { + // int boff = _ptr[k]; + // int blen = len(k); + + // double vsum = 0; + // int curRunEnd = 0; + // for(int bix = 0; bix < blen; bix += 2) { + // int curRunStartOff = curRunEnd + _data[boff + bix]; + // int curRunLen = _data[boff + bix + 1]; + // vsum += LinearAlgebraUtils.vectSum(a, curRunStartOff + off, curRunLen); + // curRunEnd = curRunStartOff + curRunLen; + // } + + // for(int j = 0; j < _colIndexes.length; j++) { + // int colIx = _colIndexes[j] + offC; + // // scale partial results by values and write results + // c[colIx] += vsum * values[valOff++]; + // } + // } + // } + // } // } // @Override - // public void leftMultBySparseMatrix(SparseBlock sb, double[] c, double[] values, int numRows, int numCols, int row) { - - // final int numVals = getNumValues(); - // int sparseEndIndex = sb.size(row) + sb.pos(row); - // int[] indexes = sb.indexes(row); - // double[] sparseV = sb.values(row); - // for(int k = 0, valOff = 0; k < numVals; k++, valOff += _colIndexes.length) { - // int boff = _ptr[k]; - // int blen = len(k); - - // double vsum = 0; - // int pointSparse = sb.pos(row); - // int curRunEnd = 0; - // for(int bix = 0; bix < blen; bix += 2) { - // int curRunStartOff = curRunEnd + _data[boff + bix]; - // int curRunLen = _data[boff + bix + 1]; - // curRunEnd = curRunStartOff + curRunLen; - // while(pointSparse < sparseEndIndex && indexes[pointSparse] < curRunStartOff) { - // pointSparse++; - // } - // while(pointSparse != sparseEndIndex && indexes[pointSparse] >= curRunStartOff && - // indexes[pointSparse] < curRunEnd) { - // vsum += sparseV[pointSparse++]; - // } - // if(pointSparse == sparseEndIndex) { - // break; - // } - // } - - // for(int j = 0; j < _colIndexes.length; j++) { - // int Voff = _colIndexes[j] + row * numCols; - // c[Voff] += vsum * values[valOff + j]; - // } - // } + // public void leftMultBySparseMatrix(SparseBlock sb, double[] c, double[] values, int numRows, int numCols, int + // row) { + + // final int numVals = getNumValues(); + // int sparseEndIndex = sb.size(row) + sb.pos(row); + // int[] indexes = sb.indexes(row); + // double[] sparseV = sb.values(row); + // for(int k = 0, valOff = 0; k < numVals; k++, valOff += _colIndexes.length) { + // int boff = _ptr[k]; + // int blen = len(k); + + // double vsum = 0; + // int pointSparse = sb.pos(row); + // int curRunEnd = 0; + // for(int bix = 0; bix < blen; bix += 2) { + // int curRunStartOff = curRunEnd + _data[boff + bix]; + // int curRunLen = _data[boff + bix + 1]; + // curRunEnd = curRunStartOff + curRunLen; + // while(pointSparse < sparseEndIndex && indexes[pointSparse] < curRunStartOff) { + // pointSparse++; + // } + // while(pointSparse != sparseEndIndex && indexes[pointSparse] >= curRunStartOff && + // indexes[pointSparse] < curRunEnd) { + // vsum += sparseV[pointSparse++]; + // } + // if(pointSparse == sparseEndIndex) { + // break; + // } + // } + + // for(int j = 0; j < _colIndexes.length; j++) { + // int Voff = _colIndexes[j] + row * numCols; + // c[Voff] += vsum * values[valOff + j]; + // } + // } // } @@ -1091,7 +1067,7 @@ public class ColGroupRLE extends ColGroupOffset { lenL = _data[boffL + bixL + 1]; final int endL = startL + lenL; for(int i = startL; i < endL; i++) - ag.increment(lhs.getIndex(i) + offKr); + ag.increment(lhs._data.getIndex(i) + offKr); } } @@ -1169,7 +1145,7 @@ public class ColGroupRLE extends ColGroupOffset { } @Override - public Dictionary preAggregateThatSDCStructure(ColGroupSDC that, Dictionary ret) { + public Dictionary preAggregateThatSDCStructure(ColGroupSDC that, Dictionary ret, boolean preModified) { throw new NotImplementedException(); } @@ -1182,4 +1158,9 @@ public class ColGroupRLE extends ColGroupOffset { public Dictionary preAggregateThatSDCSingleZerosStructure(ColGroupSDCSingleZeros that, Dictionary ret) { throw new NotImplementedException(); } + + @Override + public Dictionary preAggregateThatSDCSingleStructure(ColGroupSDCSingle that, Dictionary ret, boolean preModified){ + throw new NotImplementedException(); + } } diff --git a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupSDC.java b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupSDC.java index 5e2d303..1acbfdc 100644 --- a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupSDC.java +++ b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupSDC.java @@ -61,7 +61,7 @@ public class ColGroupSDC extends ColGroupValue { */ protected AMapToData _data; - /** + /** * Constructor for serialization * * @param numRows Number of rows contained @@ -405,7 +405,7 @@ public class ColGroupSDC extends ColGroupValue { int row; for(; i < this._numRows && it.hasNext(); i++) { - int col = lhs.getIndex(i); + int col = lhs._data.getIndex(i); if(it.value() == i) row = getIndex(it.getDataIndexAndIncrement()); else @@ -414,7 +414,7 @@ public class ColGroupSDC extends ColGroupValue { } row = offsetToDefault; for(; i < this._numRows; i++) { - int col = lhs.getIndex(i); + int col = lhs._data.getIndex(i); ag.increment(col + row * nCol); } @@ -612,54 +612,77 @@ public class ColGroupSDC extends ColGroupValue { for(; i < _numRows && it.hasNext(); i++) { int to = (it.value() == i) ? getIndex(it.getDataIndexAndIncrement()) : offsetToDefault; - that._dict.addToEntry(ret, that.getIndex(i), to, nCol); + that._dict.addToEntry(ret, that._data.getIndex(i), to, nCol); } for(; i < _numRows; i++) - that._dict.addToEntry(ret, that.getIndex(i), offsetToDefault, nCol); + that._dict.addToEntry(ret, that._data.getIndex(i), offsetToDefault, nCol); return ret; } @Override - public Dictionary preAggregateThatSDCStructure(ColGroupSDC that, Dictionary ret) { + public Dictionary preAggregateThatSDCStructure(ColGroupSDC that, Dictionary ret, boolean preModified) { final AIterator itThat = that._indexes.getIterator(); final AIterator itThis = _indexes.getIterator(); + final int nCol = that._colIndexes.length; final int offsetToDefaultThat = that.getNumValues() - 1; final int offsetToDefaultThis = getNumValues() - 1; - final int nCol = that._colIndexes.length; - - int i = 0; - - for(; i < _numRows && itThat.hasNext() && itThis.hasNext(); i++) { - int to = (itThis.value() == i) ? getIndex(itThis.getDataIndexAndIncrement()) : offsetToDefaultThis; - int fr = (itThat.value() == i) ? that.getIndex(itThat.getDataIndexAndIncrement()) : offsetToDefaultThat; - that._dict.addToEntry(ret, fr, to, nCol); - } - if(itThat.hasNext()) { - for(; i < _numRows && itThat.hasNext(); i++) { - int fr = (itThat.value() == i) ? that.getIndex(itThat.getDataIndexAndIncrement()) : offsetToDefaultThat; + if(preModified) { + while(itThat.hasNext() && itThis.hasNext()) { + if(itThat.value() == itThis.value()) { + final int fr = that.getIndex(itThat.getDataIndexAndIncrement()); + final int to = getIndex(itThis.getDataIndexAndIncrement()); + that._dict.addToEntry(ret, fr, to, nCol); + } + else if(itThat.value() < itThis.value()) { + final int fr = that.getIndex(itThat.getDataIndexAndIncrement()); + that._dict.addToEntry(ret, fr, offsetToDefaultThis, nCol); + } + else + itThis.next(); + } + + while(itThat.hasNext()) { + final int fr = that.getIndex(itThat.getDataIndexAndIncrement()); that._dict.addToEntry(ret, fr, offsetToDefaultThis, nCol); } } + else { + int i = 0; - if(itThis.hasNext()) { - for(; i < _numRows && itThis.hasNext(); i++) { + for(; i < _numRows && itThat.hasNext() && itThis.hasNext(); i++) { int to = (itThis.value() == i) ? getIndex(itThis.getDataIndexAndIncrement()) : offsetToDefaultThis; - that._dict.addToEntry(ret, offsetToDefaultThat, to, nCol); + int fr = (itThat.value() == i) ? that.getIndex(itThat.getDataIndexAndIncrement()) : offsetToDefaultThat; + that._dict.addToEntry(ret, fr, to, nCol); + } + + if(itThat.hasNext()) { + for(; i < _numRows && itThat.hasNext(); i++) { + int fr = (itThat.value() == i) ? that + .getIndex(itThat.getDataIndexAndIncrement()) : offsetToDefaultThat; + that._dict.addToEntry(ret, fr, offsetToDefaultThis, nCol); + } } + + if(itThis.hasNext()) { + for(; i < _numRows && itThis.hasNext(); i++) { + int to = (itThis.value() == i) ? getIndex(itThis.getDataIndexAndIncrement()) : offsetToDefaultThis; + that._dict.addToEntry(ret, offsetToDefaultThat, to, nCol); + } + } + + for(; i < _numRows; i++) + that._dict.addToEntry(ret, offsetToDefaultThat, offsetToDefaultThis, nCol); } - for(; i < _numRows; i++) - that._dict.addToEntry(ret, offsetToDefaultThat, offsetToDefaultThis, nCol); - return ret; } @Override public Dictionary preAggregateThatSDCZerosStructure(ColGroupSDCZeros that, Dictionary ret) { - + final AIterator itThat = that._indexes.getIterator(); final AIterator itThis = _indexes.getIterator(); final int nCol = that._colIndexes.length; @@ -671,7 +694,7 @@ public class ColGroupSDC extends ColGroupValue { final int to = getIndex(itThis.getDataIndexAndIncrement()); that._dict.addToEntry(ret, fr, to, nCol); } - else if(itThat.value() < itThis.value()){ + else if(itThat.value() < itThis.value()) { final int fr = that.getIndex(itThat.getDataIndexAndIncrement()); that._dict.addToEntry(ret, fr, defThis, nCol); } @@ -679,7 +702,7 @@ public class ColGroupSDC extends ColGroupValue { itThis.next(); } - while(itThat.hasNext()){ + while(itThat.hasNext()) { final int fr = that.getIndex(itThat.getDataIndexAndIncrement()); that._dict.addToEntry(ret, fr, defThis, nCol); } @@ -688,7 +711,12 @@ public class ColGroupSDC extends ColGroupValue { } @Override - public Dictionary preAggregateThatSDCSingleZerosStructure(ColGroupSDCSingleZeros that, Dictionary ret){ + public Dictionary preAggregateThatSDCSingleZerosStructure(ColGroupSDCSingleZeros that, Dictionary ret) { + throw new NotImplementedException(); + } + + @Override + public Dictionary preAggregateThatSDCSingleStructure(ColGroupSDCSingle that, Dictionary ret, boolean preModified) { throw new NotImplementedException(); } diff --git a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupSDCSingle.java b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupSDCSingle.java index 82e5c81..85aab80 100644 --- a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupSDCSingle.java +++ b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupSDCSingle.java @@ -243,19 +243,24 @@ public class ColGroupSDCSingle extends ColGroupValue { @Override public int[] getCounts(int[] counts) { - final AIterator it = _indexes.getIterator(); - - while(it.hasNext()) { - it.next(); - counts[1]++; - } - counts[0] = _numRows - counts[1]; + counts[0] = _indexes.getSize(); + counts[1] = _numRows - counts[0]; return counts; } @Override public int[] getCounts(int rl, int ru, int[] counts) { - throw new NotImplementedException("Not Implemented"); + final AIterator it = _indexes.getIterator(); + it.skipTo(rl); + + while(it.hasNext() && it.value() < ru) { + it.next(); + counts[0]++; + } + + counts[1] = ru - rl - counts[0]; + + return counts; } public double[] preAggregate(double[] a, int row) { @@ -275,7 +280,7 @@ public class ColGroupSDCSingle extends ColGroupValue { for(; i < _numRows; i++, offA++) vals[0] += a[offA]; } - else{ + else { for(; i < _numRows && it.hasNext(); i++) if(it.value() == i) vals[1] += a[i]; @@ -294,10 +299,10 @@ public class ColGroupSDCSingle extends ColGroupValue { final int[] indexes = sb.indexes(row); final double[] sparseV = sb.values(row); final AIterator it = _indexes.getIterator(); - + for(int i = sb.pos(row); i < sb.size(row) + sb.pos(row); i++) { it.skipTo(indexes[i]); - if(it.value() == indexes[i]){ + if(it.value() == indexes[i]) { vals[0] += sparseV[i]; it.next(); } @@ -309,7 +314,7 @@ public class ColGroupSDCSingle extends ColGroupValue { @Override public long estimateInMemorySize() { - long size = ColGroupSizes.estimateInMemorySizeGroupValue(_colIndexes.length, getNumValues(), isLossy()); + long size = ColGroupSizes.estimateInMemorySizeGroupValue(_colIndexes.length, getNumValues(), isLossy()); size += _indexes.getInMemorySize(); return size; } @@ -375,8 +380,8 @@ public class ColGroupSDCSingle extends ColGroupValue { int row; for(; i < this._numRows && it.hasNext(); i++) { - int col = lhs.getIndex(i); - if(it.value() == i){ + int col = lhs._data.getIndex(i); + if(it.value() == i) { row = 1; it.next(); } @@ -387,7 +392,7 @@ public class ColGroupSDCSingle extends ColGroupValue { } row = 0; for(; i < this._numRows; i++) { - int col = lhs.getIndex(i); + int col = lhs._data.getIndex(i); if(col < lhs.getNumValues()) ag.increment(col + row * nCol); } @@ -459,7 +464,8 @@ public class ColGroupSDCSingle extends ColGroupValue { final int rhsNV = this.getNumValues(); final int retSize = lhsNV * rhsNV; final int nCol = lhs.getNumValues(); - IPreAggregate ag = PreAggregateFactory.ag(retSize);; + IPreAggregate ag = PreAggregateFactory.ag(retSize); + ; final AIterator lIt = lhs._indexes.getIterator(); final AIterator rIt = _indexes.getIterator(); @@ -473,7 +479,7 @@ public class ColGroupSDCSingle extends ColGroupValue { } else col = 0; - if(rIt.value() == i){ + if(rIt.value() == i) { row = 1; rIt.next(); } @@ -482,9 +488,9 @@ public class ColGroupSDCSingle extends ColGroupValue { ag.increment(col + row * nCol); } - if(lIt.hasNext() ) { + if(lIt.hasNext()) { row = 1; - for(; i < _numRows && lIt.hasNext() ; i++) { + for(; i < _numRows && lIt.hasNext(); i++) { if(lIt.value() == i) { col = 1; lIt.next(); @@ -496,10 +502,10 @@ public class ColGroupSDCSingle extends ColGroupValue { } } - if( rIt.hasNext()) { + if(rIt.hasNext()) { col = 1; for(; i < _numRows && rIt.hasNext(); i++) { - if(rIt.value()== i) { + if(rIt.value() == i) { row = 1; rIt.next(); } @@ -558,18 +564,91 @@ public class ColGroupSDCSingle extends ColGroupValue { } @Override - public Dictionary preAggregateThatSDCStructure(ColGroupSDC that, Dictionary ret) { + public Dictionary preAggregateThatSDCStructure(ColGroupSDC that, Dictionary ret, boolean preModified) { throw new NotImplementedException(); } @Override - public Dictionary preAggregateThatSDCZerosStructure(ColGroupSDCZeros that, Dictionary ret){ + public Dictionary preAggregateThatSDCZerosStructure(ColGroupSDCZeros that, Dictionary ret) { throw new NotImplementedException(); } - + @Override - public Dictionary preAggregateThatSDCSingleZerosStructure(ColGroupSDCSingleZeros that, Dictionary ret){ + public Dictionary preAggregateThatSDCSingleZerosStructure(ColGroupSDCSingleZeros that, Dictionary ret) { throw new NotImplementedException(); } + @Override + public Dictionary preAggregateThatSDCSingleStructure(ColGroupSDCSingle that, Dictionary ret, boolean preModified) { + final AIterator itThat = that._indexes.getIterator(); + final AIterator itThis = _indexes.getIterator(); + final int nCol = that._colIndexes.length; + + if(preModified) { + + while(itThat.hasNext() && itThis.hasNext()) { + if(itThat.value() == itThis.value()) { + itThat.next(); + itThis.next(); + that._dict.addToEntry(ret, 1, 0, nCol); + } + else if(itThat.value() < itThis.value()) { + itThat.next(); + // that._dict.addToEntry(ret, 0, 1, nCol); + } + else + itThis.next(); + } + + // while(itThat.hasNext()) { + // final int fr = that.getIndex(itThat.getDataIndexAndIncrement()); + // that._dict.addToEntry(ret, fr, 1, nCol); + // } + return ret; + } + else { + int i = 0; + for(; i < _numRows && itThat.hasNext() && itThis.hasNext(); i++) { + int to = 0; + if(itThis.value() == i) { + itThis.next(); + to = 1; + } + int fr = 0; + if(itThat.value() == i) { + itThat.next(); + fr = 1; + } + that._dict.addToEntry(ret, fr, to, nCol); + } + + if(itThat.hasNext()) { + for(; i < _numRows && itThat.hasNext(); i++) { + int fr = 0; + if(itThat.value() == i) { + itThat.next(); + fr = 1; + } + that._dict.addToEntry(ret, fr, 1, nCol); + } + } + + if(itThis.hasNext()) { + for(; i < _numRows && itThis.hasNext(); i++) { + int to = 0; + if(itThis.value() == i) { + itThis.next(); + to = 1; + } + that._dict.addToEntry(ret, 1, to, nCol); + } + } + + for(; i < _numRows; i++) + that._dict.addToEntry(ret, 1, 1, nCol); + + return ret; + } + + } } diff --git a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupSDCSingleZeros.java b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupSDCSingleZeros.java index faf8955..34d148d 100644 --- a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupSDCSingleZeros.java +++ b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupSDCSingleZeros.java @@ -67,7 +67,7 @@ public class ColGroupSDCSingleZeros extends ColGroupValue { int[] cachedCounts) { super(colIndices, numRows, dict, cachedCounts); _indexes = OffsetFactory.create(indexes, numRows); - _zeros = false; + _zeros = true; } protected ColGroupSDCSingleZeros(int[] colIndices, int numRows, ADictionary dict, AOffset offsets, @@ -196,25 +196,22 @@ public class ColGroupSDCSingleZeros extends ColGroupValue { @Override public int[] getCounts(int[] counts) { - return getCounts(0, _numRows, counts); + counts[0] = _indexes.getSize(); + counts[1] = _numRows - counts[0]; + return counts; } @Override public int[] getCounts(int rl, int ru, int[] counts) { - int i = rl; final AIterator it = _indexes.getIterator(); it.skipTo(rl); - int zeros = 0; while(it.hasNext() && it.value() < ru) { - int oldI = i; - i = it.value(); it.next(); - zeros += i - oldI - 1; counts[0]++; } - counts[counts.length - 1] += zeros + ru - i; + counts[1] = ru - rl - counts[0]; return counts; } @@ -287,8 +284,8 @@ public class ColGroupSDCSingleZeros extends ColGroupValue { return new ColGroupSDCSingleZeros(_colIndexes, _numRows, applyBinaryRowOp(op.fn, v, sparseSafe, left), _indexes, getCachedCounts()); else { - ADictionary aDictionary = swapEntries(applyBinaryRowOp(op.fn, v, sparseSafe, left)); - return new ColGroupSDCSingle(_colIndexes, _numRows, aDictionary, _indexes, null); + ADictionary aDictionary = applyBinaryRowOp(op.fn, v, sparseSafe, left); + return new ColGroupSDCSingle(_colIndexes, _numRows, aDictionary, _indexes, getCachedCounts()); } } @@ -348,7 +345,7 @@ public class ColGroupSDCSingleZeros extends ColGroupValue { final AIterator it = _indexes.getIterator(); while(it.hasNext()) { - final int col = lhs.getIndex(it.value()); + final int col = lhs._data.getIndex(it.value()); ag.increment(col); } return ag; @@ -427,7 +424,7 @@ public class ColGroupSDCSingleZeros extends ColGroupValue { } @Override - public Dictionary preAggregateThatSDCStructure(ColGroupSDC that, Dictionary ret) { + public Dictionary preAggregateThatSDCStructure(ColGroupSDC that, Dictionary ret, boolean preModified) { throw new NotImplementedException(); } @@ -451,4 +448,9 @@ public class ColGroupSDCSingleZeros extends ColGroupValue { return ret; } + + @Override + public Dictionary preAggregateThatSDCSingleStructure(ColGroupSDCSingle that, Dictionary ret, boolean preModified){ + throw new NotImplementedException(); + } } diff --git a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupSDCZeros.java b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupSDCZeros.java index 3f6b8d3..410ec90 100644 --- a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupSDCZeros.java +++ b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupSDCZeros.java @@ -358,7 +358,7 @@ public class ColGroupSDCZeros extends ColGroupValue { final AIterator it = _indexes.getIterator(); while(it.hasNext()) { - final int col = lhs.getIndex(it.value()); + final int col = lhs._data.getIndex(it.value()); final int row = getIndex(it.getDataIndexAndIncrement()); ag.increment(col + row * nCol); } @@ -482,7 +482,7 @@ public class ColGroupSDCZeros extends ColGroupValue { final int nCol = that._colIndexes.length; while(itThis.hasNext()) { - final int fr = that.getIndex(itThis.value()); + final int fr = that._data.getIndex(itThis.value()); final int to = getIndex(itThis.getDataIndexAndIncrement()); that._dict.addToEntry(ret, fr, to, nCol); } @@ -491,7 +491,7 @@ public class ColGroupSDCZeros extends ColGroupValue { } @Override - public Dictionary preAggregateThatSDCStructure(ColGroupSDC that, Dictionary ret) { + public Dictionary preAggregateThatSDCStructure(ColGroupSDC that, Dictionary ret, boolean preModified) { throw new NotImplementedException(); } @@ -534,4 +534,9 @@ public class ColGroupSDCZeros extends ColGroupValue { } return ret; } + + @Override + public Dictionary preAggregateThatSDCSingleStructure(ColGroupSDCSingle that, Dictionary re, boolean preModified){ + throw new NotImplementedException(); + } } diff --git a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupUncompressed.java b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupUncompressed.java index b13078c..a848031 100644 --- a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupUncompressed.java +++ b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupUncompressed.java @@ -25,7 +25,6 @@ import java.io.IOException; import java.util.Arrays; import org.apache.commons.lang.NotImplementedException; -import org.apache.sysds.runtime.DMLCompressionException; import org.apache.sysds.runtime.controlprogram.parfor.stat.InfrastructureAnalyzer; import org.apache.sysds.runtime.data.DenseBlock; import org.apache.sysds.runtime.data.DenseBlockFP64; @@ -318,16 +317,15 @@ public class ColGroupUncompressed extends AColGroup { public void leftMultByMatrix(MatrixBlock matrix, double[] result, int numCols, int rl, int ru) { - MatrixBlock tmpRet = new MatrixBlock(ru - rl, _data.getNumColumns(), false); + final MatrixBlock tmpRet = new MatrixBlock(ru - rl, _data.getNumColumns(), false); tmpRet.allocateDenseBlock(); - MatrixBlock leftSlice = matrix.slice(rl, ru - 1, false); + final MatrixBlock leftSlice = matrix.slice(rl, ru - 1, false); LibMatrixMult.matrixMult(leftSlice, _data, tmpRet); int offT = numCols * rl; - if(tmpRet.isEmpty()) return; - if(tmpRet.isInSparseFormat()) { - SparseBlock sb = tmpRet.getSparseBlock(); + else if(tmpRet.isInSparseFormat()) { + final SparseBlock sb = tmpRet.getSparseBlock(); for(int rowIdx = 0; rowIdx < ru - rl; rowIdx++, offT += numCols) { if(!sb.isEmpty(rowIdx)) { final int apos = sb.pos(rowIdx); @@ -340,33 +338,10 @@ public class ColGroupUncompressed extends AColGroup { } } else { - double[] tmpRetV = tmpRet.getDenseBlockValues(); - for(int j = rl, offTemp = 0; j < ru; j++, offTemp += _colIndexes.length, offT += numCols) { + final double[] tmpRetV = tmpRet.getDenseBlockValues(); + for(int j = rl, offTemp = 0; j < ru; j++, offTemp += _colIndexes.length, offT += numCols) for(int i = 0; i < _colIndexes.length; i++) result[offT + _colIndexes[i]] += tmpRetV[offTemp + i]; - } - } - } - - public void rightMultByMatrix(int[] outputColumns, double[] preAggregatedB, double[] c, int thatNrColumns, int rl, - int ru) { - throw new NotImplementedException("Should not be called use other matrix function for uncompressed columns"); - } - - public double computeMxx(double c, Builtin builtin) { - throw new NotImplementedException("Not implemented max min on uncompressed"); - } - - public void leftMultByMatrix(MatrixBlock matrix, MatrixBlock result) { - MatrixBlock pret = new MatrixBlock(matrix.getNumRows(), _colIndexes.length, false); - LibMatrixMult.matrixMult(matrix, _data, pret); - - // copying partialResult to the proper indices of the result - if(!pret.isEmptyBlock(false)) { - double[] rsltArr = result.getDenseBlockValues(); - for(int colIx = 0; colIx < _colIndexes.length; colIx++) - rsltArr[_colIndexes[colIx]] = pret.quickGetValue(0, colIx); - result.recomputeNonZeros(); } } @@ -382,14 +357,20 @@ public class ColGroupUncompressed extends AColGroup { @Override public AColGroup binaryRowOp(BinaryOperator op, double[] v, boolean sparseSafe, boolean left) { - DenseBlock b = new DenseBlockFP64(new int[] {1, v.length}, v); - MatrixBlock that = new MatrixBlock(1, v.length, b); - that.setNonZeros(v.length); + double[] selectedValues = new double[_colIndexes.length]; + for(int i = 0; i < _colIndexes.length; i++) { + selectedValues[i] = v[_colIndexes[i]]; + } + DenseBlock b = new DenseBlockFP64(new int[] {1, _colIndexes.length}, selectedValues); + MatrixBlock that = new MatrixBlock(1, _colIndexes.length, b); + that.setNonZeros(_colIndexes.length); MatrixBlock resultBlock = new MatrixBlock(); + if(left) that.binaryOperations(op, _data, resultBlock); else _data.binaryOperations(op, that, resultBlock); + return new ColGroupUncompressed(_colIndexes, resultBlock, false); } @@ -557,7 +538,10 @@ public class ColGroupUncompressed extends AColGroup { @Override public AColGroup copy() { - throw new NotImplementedException("Not implemented copy of uncompressed colGroup yet."); + MatrixBlock newData = new MatrixBlock(_data.getNumRows(), _data.getNumColumns(), _data.isInSparseFormat()); + // _data.copy(newData); + newData.copy(_data); + return new ColGroupUncompressed(_colIndexes, newData, false); } @Override @@ -585,7 +569,49 @@ public class ColGroupUncompressed extends AColGroup { if(lhs instanceof ColGroupEmpty) return; if(lhs instanceof ColGroupUncompressed) { - throw new DMLCompressionException("Not Implemented"); + ColGroupUncompressed lhsUC = (ColGroupUncompressed) lhs; + MatrixBlock tmpRet = new MatrixBlock(_colIndexes.length, _colIndexes.length, 0); + + if(lhsUC._data == this._data) { + + LibMatrixMult.matrixMultTransposeSelf(this._data, tmpRet, true, + InfrastructureAnalyzer.getLocalParallelism()); + } + else { + LOG.warn("Inefficient Left Matrix Multiplication with transpose of left hand side : t(l) %*% r"); + MatrixBlock lhData = lhsUC._data; + MatrixBlock transposed = new MatrixBlock(lhData.getNumColumns(), lhData.getNumRows(), false); + LibMatrixReorg.transpose(lhData, transposed); + transposed.setNonZeros(lhData.getNonZeros()); + // do transposed left hand side, matrix multiplication. + LibMatrixMult.matrixMult(transposed, this._data, tmpRet); + } + + if(tmpRet.isEmpty()) + return; + else if(tmpRet.isInSparseFormat()) { + SparseBlock sb = tmpRet.getSparseBlock(); + for(int rowIdx = 0, offT = 0; rowIdx < tmpRet.getNumRows(); rowIdx++, offT += numCols) { + if(!sb.isEmpty(rowIdx)) { + final int apos = sb.pos(rowIdx); + final int alen = sb.size(rowIdx) + apos; + final int[] aix = sb.indexes(rowIdx); + final double[] avals = sb.values(rowIdx); + for(int col = apos; col < alen; col++) + result[offT + _colIndexes[aix[col]]] += avals[col]; + } + } + } + else { + double[] tmpRetV = tmpRet.getDenseBlockValues(); + for(int j = 0, offTemp = 0, offT = 0; + j < tmpRet.getNumRows(); + j++, offTemp += _colIndexes.length, offT += numCols) { + for(int i = 0; i < _colIndexes.length; i++) + result[offT + _colIndexes[i]] += tmpRetV[offTemp + i]; + } + } + } else { diff --git a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupValue.java b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupValue.java index 0eeeb13..223c3a0 100644 --- a/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupValue.java +++ b/src/main/java/org/apache/sysds/runtime/compress/colgroup/ColGroupValue.java @@ -30,21 +30,14 @@ import org.apache.commons.lang.NotImplementedException; import org.apache.commons.lang3.tuple.ImmutablePair; import org.apache.commons.lang3.tuple.Pair; import org.apache.sysds.runtime.DMLCompressionException; -import org.apache.sysds.runtime.compress.CompressionSettings; import org.apache.sysds.runtime.compress.colgroup.dictionary.ADictionary; import org.apache.sysds.runtime.compress.colgroup.dictionary.Dictionary; import org.apache.sysds.runtime.compress.colgroup.dictionary.DictionaryFactory; -import org.apache.sysds.runtime.compress.colgroup.dictionary.QDictionary; import org.apache.sysds.runtime.compress.colgroup.pre.ArrPreAggregate; import org.apache.sysds.runtime.compress.colgroup.pre.IPreAggregate; -import org.apache.sysds.runtime.compress.colgroup.pre.MapPreAggregate; -import org.apache.sysds.runtime.compress.utils.ABitmap; -import org.apache.sysds.runtime.compress.utils.Bitmap; -import org.apache.sysds.runtime.compress.utils.BitmapLossy; import org.apache.sysds.runtime.controlprogram.parfor.stat.InfrastructureAnalyzer; import org.apache.sysds.runtime.data.SparseBlock; import org.apache.sysds.runtime.functionobjects.Builtin; -import org.apache.sysds.runtime.functionobjects.Builtin.BuiltinCode; import org.apache.sysds.runtime.functionobjects.ValueFunction; import org.apache.sysds.runtime.matrix.data.LibMatrixReorg; import org.apache.sysds.runtime.matrix.data.MatrixBlock; @@ -81,34 +74,6 @@ public abstract class ColGroupValue extends ColGroupCompressed implements Clonea super(numRows); } - /** - * Main constructor for the ColGroupValues. Used to contain the dictionaries used for the different types of - * ColGroup. - * - * @param colIndices indices (within the block) of the columns included in this column - * @param numRows total number of rows in the parent block - * @param ubm Uncompressed bitmap representation of the block - * @param cs The Compression settings used for compression - */ - protected ColGroupValue(int[] colIndices, int numRows, ABitmap ubm, CompressionSettings cs) { - super(colIndices, numRows); - - _zeros = ubm.getNumOffsets() < (long) numRows; - // sort values by frequency, if requested - if(cs.sortValuesByLength && numRows > CompressionSettings.BITMAP_BLOCK_SZ) - ubm.sortValuesByFrequency(); - - switch(ubm.getType()) { - case Full: - _dict = new Dictionary(((Bitmap) ubm).getValues()); - break; - case Lossy: - _dict = new QDictionary((BitmapLossy) ubm).makeDoubleDictionary(); - // _lossy = true; - break; - } - } - protected ColGroupValue(int[] colIndices, int numRows, ADictionary dict, int[] cachedCounts) { super(colIndices, numRows); _dict = dict; @@ -130,20 +95,21 @@ public abstract class ColGroupValue extends ColGroupCompressed implements Clonea * * @return the number of distinct sets of values associated with the bitmaps in this column group */ - public int getNumValues() { + public final int getNumValues() { return _dict.getNumberOfValues(_colIndexes.length); } @Override - public double[] getValues() { + public final double[] getValues() { return _dict != null ? _dict.getValues() : null; } - public ADictionary getDictionary() { + public final ADictionary getDictionary() { return _dict; } - public void addMinMax(double[] ret) { + @Override + public final void addMinMax(double[] ret) { _dict.addMaxAndMin(ret, _colIndexes); } @@ -362,14 +328,6 @@ public abstract class ColGroupValue extends ColGroupCompressed implements Clonea return ret; } - public double getMin() { - return computeMxx(Double.POSITIVE_INFINITY, Builtin.getBuiltinFnObject(BuiltinCode.MIN)); - } - - public double getMax() { - return computeMxx(Double.NEGATIVE_INFINITY, Builtin.getBuiltinFnObject(BuiltinCode.MAX)); - } - protected double computeMxx(double c, Builtin builtin) { if(_zeros) c = builtin.execute(c, 0); @@ -487,11 +445,11 @@ public abstract class ColGroupValue extends ColGroupCompressed implements Clonea @Override public String toString() { StringBuilder sb = new StringBuilder(); + sb.append(" Is Lossy: " + _dict.isLossy() + " num Rows: " + getNumRows() + " contain zero row:" + _zeros); sb.append(super.toString()); - sb.append("Is Lossy: " + _dict.isLossy() + " num Rows: " + getNumRows() + " contain zero row:" + _zeros); if(_dict != null) { sb.append(String.format("\n%15s%5d ", "Values:", _dict.getValues().length)); - _dict.getString(sb, _colIndexes.length); + sb.append(_dict.getString(_colIndexes.length)); } return sb.toString(); } @@ -556,6 +514,30 @@ public abstract class ColGroupValue extends ColGroupCompressed implements Clonea return super.clone(); } + public AColGroup copyAndSet(double[] newDictionary) { + try { + ColGroupValue clone = (ColGroupValue) this.clone(); + clone.setDictionary(new Dictionary(newDictionary)); + return clone; + } + catch(CloneNotSupportedException e) { + e.printStackTrace(); + } + return null; + } + + public AColGroup copyAndSet(ADictionary newDictionary) { + try { + ColGroupValue clone = (ColGroupValue) this.clone(); + clone.setDictionary(newDictionary); + return clone; + } + catch(CloneNotSupportedException e) { + e.printStackTrace(); + } + return null; + } + public AColGroup copyAndSet(int[] colIndexes, double[] newDictionary) { try { ColGroupValue clone = (ColGroupValue) this.clone(); @@ -594,7 +576,6 @@ public abstract class ColGroupValue extends ColGroupCompressed implements Clonea return null; } - @Override protected AColGroup sliceSingleColumn(int idx) { ColGroupValue ret = (ColGroupValue) copy(); @@ -667,12 +648,12 @@ public abstract class ColGroupValue extends ColGroupCompressed implements Clonea */ protected void postScaling(double[] dictValues, double[] vals, double[] c, int numVals, int row, int totalCols, int offT) { - final int ncol = getNumCols(); + final int nCol = getNumCols(); int valOff = 0; for(int k = 0; k < numVals; k++) { double aval = vals[k]; - for(int j = 0; j < ncol; j++) { + for(int j = 0; j < nCol; j++) { int colIx = _colIndexes[j] + row * totalCols; c[offT + colIx] += aval * dictValues[valOff++]; } @@ -771,21 +752,27 @@ public abstract class ColGroupValue extends ColGroupCompressed implements Clonea /** * Pre aggregate into a dictionary. It is assumed that "that" have more distinct values than, "this". * - * @param that the other column group whose indexes are used for aggregation. + * @param that the other column group whose indexes are used for aggregation. + * @param preModify specifies if the matrix in this * @return A aggregate dictionary */ - public Dictionary preAggregateThatIndexStructure(ColGroupValue that) { + public Dictionary preAggregateThatIndexStructure(ColGroupValue that, boolean preModify) { + int outputLength = that._colIndexes.length * this.getNumValues(); + Dictionary ret = new Dictionary(new double[outputLength]); - Dictionary ret = new Dictionary(new double[that._colIndexes.length * this.getNumValues()]); + // if(preModify) + // LOG.error(preModify + " " + that.getClass().getSimpleName() + " in " + this.getClass().getSimpleName()); if(that instanceof ColGroupDDC) return preAggregateThatDDCStructure((ColGroupDDC) that, ret); else if(that instanceof ColGroupSDC) - return preAggregateThatSDCStructure((ColGroupSDC) that, ret); - else if(that instanceof ColGroupSDCZeros) - return preAggregateThatSDCZerosStructure((ColGroupSDCZeros) that, ret); + return preAggregateThatSDCStructure((ColGroupSDC) that, ret, preModify); + else if(that instanceof ColGroupSDCSingle) + return preAggregateThatSDCSingleStructure((ColGroupSDCSingle) that, ret, preModify); else if(that instanceof ColGroupSDCSingleZeros) return preAggregateThatSDCSingleZerosStructure((ColGroupSDCSingleZeros) that, ret); + else if(that instanceof ColGroupSDCZeros) + return preAggregateThatSDCZerosStructure((ColGroupSDCZeros) that, ret); else if(that instanceof ColGroupConst) return preAggregateThatConstStructure((ColGroupConst) that, ret); @@ -795,15 +782,17 @@ public abstract class ColGroupValue extends ColGroupCompressed implements Clonea public abstract Dictionary preAggregateThatDDCStructure(ColGroupDDC that, Dictionary ret); - public abstract Dictionary preAggregateThatSDCStructure(ColGroupSDC that, Dictionary ret); + public abstract Dictionary preAggregateThatSDCStructure(ColGroupSDC that, Dictionary ret, boolean preModified); public abstract Dictionary preAggregateThatSDCZerosStructure(ColGroupSDCZeros that, Dictionary ret); public abstract Dictionary preAggregateThatSDCSingleZerosStructure(ColGroupSDCSingleZeros that, Dictionary ret); + public abstract Dictionary preAggregateThatSDCSingleStructure(ColGroupSDCSingle that, Dictionary ret, + boolean preModified); + public Dictionary preAggregateThatConstStructure(ColGroupConst that, Dictionary ret) { computeColSums(ret.getValues(), false); - LOG.error(Arrays.toString(ret.getValues())); return ret; } @@ -835,53 +824,82 @@ public abstract class ColGroupValue extends ColGroupCompressed implements Clonea final int lCol = lhs._colIndexes.length; final int rCol = this._colIndexes.length; + final double threshold = 0.2; + if(sameIndexStructure(lhs)) { int[] agI = getCounts(); for(int a = 0, off = 0; a < nvL; a++, off += nvL + 1) leftMultDictEntry(agI[a], off, nvL, lCol, rCol, lhs, numCols, lhValues, rhValues, result); } else if(lhs instanceof ColGroupConst || this instanceof ColGroupConst) { - IPreAggregate ag = preAggregate(lhs); - if(ag == null) - return; - else if(ag instanceof MapPreAggregate) - leftMultMapPreAggregate(nvL, lCol, rCol, lhs, numCols, lhValues, rhValues, result, - (MapPreAggregate) ag); - else - leftMultArrayPreAggregate(nvL, nvR, lCol, rCol, lhs, numCols, lhValues, rhValues, result, - ((ArrPreAggregate) ag).getArr()); + double[] r = this instanceof ColGroupConst ? rhValues : this._dict.colSum(getCounts(), rCol); + double[] l = lhs instanceof ColGroupConst ? lhValues : lhs._dict.colSum(lhs.getCounts(), lCol); + vectorVectorMultiply(l, lhs._colIndexes, r, this._colIndexes, result, numCols); } else { - if(nvR * rCol < nvL * lCol) { - Dictionary preAgg = lhs.preAggregateThatIndexStructure(this); + int[] countsRight = getCounts(); + int mostFrequentRight = Math.max(countsRight[0], countsRight[countsRight.length - 1]); + double percentageRight = (double) mostFrequentRight / this._numRows; + double skipRight = percentageRight * rCol; + int[] countsLeft = lhs.getCounts(); + int mostFrequentLeft = Math.max(countsLeft[0], countsLeft[countsLeft.length - 1]); + double percentageLeft = (double) mostFrequentLeft / this._numRows; + double skipLeft = percentageLeft * lCol; + + if(skipRight > threshold && percentageRight > percentageLeft && !(this instanceof ColGroupDDC)) { + double[] mct = this._dict.getMostCommonTuple(this.getCounts(), rCol); + double[] lhsSum = lhs._dict.colSum(lhs.getCounts(), lCol); + if(mct != null) + vectorVectorMultiply(lhsSum, lhs._colIndexes, mct, this._colIndexes, result, numCols); + + ColGroupValue thisM = (mct != null) ? (ColGroupValue) this + .copyAndSet(this._dict.subtractTuple(mct)) : this; + Dictionary preAgg = lhs.preAggregateThatIndexStructure(thisM, true); + matrixMultDictionariesAndOutputToColIndexes(lhValues, preAgg.getValues(), lhs._colIndexes, + this._colIndexes, result, numCols); + } + else if(skipLeft > threshold && !(lhs instanceof ColGroupDDC)) { + double[] mct = lhs._dict.getMostCommonTuple(lhs.getCounts(), lCol); + double[] thisColSum = this._dict.colSum(getCounts(), rCol); + if(mct != null) + vectorVectorMultiply(mct, lhs._colIndexes, thisColSum, this._colIndexes, result, numCols); + + ColGroupValue lhsM = (mct != null) ? (ColGroupValue) lhs.copyAndSet(lhs._dict.subtractTuple(mct)) : lhs; + Dictionary preAgg = this.preAggregateThatIndexStructure(lhsM, true); + matrixMultDictionariesAndOutputToColIndexes(preAgg.getValues(), rhValues, lhs._colIndexes, + this._colIndexes, result, numCols); + } + else if(nvR * rCol < nvL * lCol) { + Dictionary preAgg = lhs.preAggregateThatIndexStructure(this, false); matrixMultDictionariesAndOutputToColIndexes(lhValues, preAgg.getValues(), lhs._colIndexes, this._colIndexes, result, numCols); } else { - Dictionary preAgg = this.preAggregateThatIndexStructure(lhs); + Dictionary preAgg = this.preAggregateThatIndexStructure(lhs, false); matrixMultDictionariesAndOutputToColIndexes(preAgg.getValues(), rhValues, lhs._colIndexes, this._colIndexes, result, numCols); } } } - private void leftMultMapPreAggregate(final int nvL, final int lCol, final int rCol, final ColGroupValue lhs, - final int numCols, double[] lhValues, double[] rhValues, double[] c, MapPreAggregate agM) { - final int[] map = agM.getMap(); - final int aggSize = agM.getSize(); - for(int k = 0; k < aggSize; k += 2) - leftMultDictEntry(map[k + 1], map[k], nvL, lCol, rCol, lhs, numCols, lhValues, rhValues, c); - leftMultDictEntry(agM.getMapFreeValue(), 0, nvL, lCol, rCol, lhs, numCols, lhValues, rhValues, c); - } + // private void leftMultMapPreAggregate(final int nvL, final int lCol, final int rCol, final ColGroupValue lhs, + // final int numCols, double[] lhValues, double[] rhValues, double[] c, MapPreAggregate agM) { + // final int[] map = agM.getMap(); + // final int aggSize = agM.getSize(); + // for(int k = 0; k < aggSize; k += 2) + // leftMultDictEntry(map[k + 1], map[k], nvL, lCol, rCol, lhs, numCols, lhValues, rhValues, c); + // leftMultDictEntry(agM.getMapFreeValue(), 0, nvL, lCol, rCol, lhs, numCols, lhValues, rhValues, c); + // } - private void leftMultArrayPreAggregate(final int nvL, final int nvR, final int lCol, final int rCol, - final ColGroupValue lhs, final int numCols, double[] lhValues, double[] rhValues, double[] c, int[] arr) { - for(int a = 0; a < nvL * nvR; a++) - leftMultDictEntry(arr[a], a, nvL, lCol, rCol, lhs, numCols, lhValues, rhValues, c); - } + // private void leftMultArrayPreAggregate(final int nvL, final int nvR, final int lCol, final int rCol, + // final ColGroupValue lhs, final int numCols, double[] lhValues, double[] rhValues, double[] c, int[] arr) { + // for(int a = 0; a < nvL * nvR; a++) + // leftMultDictEntry(arr[a], a, nvL, lCol, rCol, lhs, numCols, lhValues, rhValues, c); + // } private void leftMultDictEntry(final int m, final int a, final int nvL, final int lCol, final int rCol, - final ColGroupValue lhs, final int numCols, double[] lhValues, double[] rhValues, double[] c) { + final ColGroupValue lhs, final int numCols, final double[] lhValues, final double[] rhValues, + final double[] c) { if(m > 0) { final int lhsRowOffset = (a % nvL) * lCol; @@ -930,27 +948,90 @@ public abstract class ColGroupValue extends ColGroupCompressed implements Clonea return _dict.getNumberNonZeros(counts, _colIndexes.length); } - private static void matrixMultDictionariesAndOutputToColIndexes(double[] left, double[] right, int[] colsLeft, - int[] colsRight, double[] result, int outCols) { - final int rows = left.length / colsLeft.length; - for(int k = 0; k < rows; k++) { - final int offL = k * colsLeft.length; - final int offR = k * colsRight.length; - for(int i = 0; i < colsLeft.length; i++) { - final int offOut = colsLeft[i] * outCols; - final double vl = left[offL + i]; - if(vl != 0) - for(int j = 0; j < colsRight.length; j++) { - final double vr = right[offR + j]; - result[offOut + colsRight[j]] += vl * vr; - } - } + private static void vectorVectorMultiply(final double[] left, final int[] leftRows, final double[] right, + final int[] rightColumns, final double[] result, final int outCols) { + if(left.length != leftRows.length) { + // LOG.error(Arrays.toString(left)); + // LOG.error(Arrays.toString(right)); + // LOG.error(Arrays.toString(leftRows)); + // LOG.error(Arrays.toString(rightColumns)); + throw new DMLCompressionException( + "Error left length " + left.length + " not equal columns length" + leftRows.length); + } + if(right.length != rightColumns.length) + throw new DMLCompressionException( + "Error right not equal length " + right.length + " " + rightColumns.length); + for(int row = 0; row < leftRows.length; row++) { + final int outputRowOffset = leftRows[row] * outCols; + final double vLeft = left[row]; + for(int col = 0; col < rightColumns.length; col++) + result[outputRowOffset + rightColumns[col]] += vLeft * right[col]; } } - @Override - public int getNumRows() { - return _numRows; + private static boolean logMM = true; + + /** + * Matrix Multiply the two matrices, note that the left side is transposed, + * + * making the multiplication a: t(left) %*% right + * + * @param left The left side matrix, transposed linearized row major + * @param right The right hand side linearized row major + * @param rowsLeft The number of rows and the row indexes on the left hand side + * @param colsRight The number of columns and the column indexes on the right hand side + * @param result The result matrix to put the results into, linearized row major + * @param outCols The output columns count, to know how much to offset into with results. + */ + private static void matrixMultDictionariesAndOutputToColIndexes(double[] left, double[] right, int[] rowsLeft, + int[] colsRight, double[] result, int outCols) { + + try { + final int rows = left.length / rowsLeft.length; + if(rows != right.length / colsRight.length) + throw new DMLCompressionException( + "Not equal number of rows: " + rows + " " + right.length / colsRight.length); + for(int k = 0; k < rows; k++) { + final int offL = k * rowsLeft.length; + final int offR = k * colsRight.length; + // final int offL = k * colsRight.length; + // final int offR = k * rowsLeft.length; + // if(offR < right.length && offL < left.length) + for(int i = 0; i < rowsLeft.length; i++) { + final int offOut = rowsLeft[i] * outCols; + final double vl = left[offL + i]; + if(vl != 0) + for(int j = 0; j < colsRight.length; j++) { + final double vr = right[offR + j]; + result[offOut + colsRight[j]] += vl * vr; + } + } + } + } + catch(Exception e) { + + if(logMM) { + StringBuilder sb = new StringBuilder(); + sb.append("\nLeft (transposed):\n"); + for(int i = 0; i < rowsLeft.length; i++) { + for(int j = i * rowsLeft.length; j < (i + 1) * rowsLeft.length; j++) + sb.append(left[j] + ", "); + sb.append("\n"); + } + LOG.error(sb); + + sb = new StringBuilder(); + sb.append("\nRight:\n"); + for(int i = 0; i < colsRight.length; i++) { + for(int j = i * colsRight.length; j < (i + 1) * colsRight.length; j++) + sb.append(right[j] + ", "); + sb.append("\n"); + } + LOG.error(sb); + logMM = false; + } + throw new DMLCompressionException("MM of pre aggregated colGroups failed", e); + } } @Override diff --git a/src/main/java/org/apache/sysds/runtime/compress/colgroup/dictionary/ADictionary.java b/src/main/java/org/apache/sysds/runtime/compress/colgroup/dictionary/ADictionary.java index 9b084e8..1aeda85 100644 --- a/src/main/java/org/apache/sysds/runtime/compress/colgroup/dictionary/ADictionary.java +++ b/src/main/java/org/apache/sysds/runtime/compress/colgroup/dictionary/ADictionary.java @@ -164,7 +164,7 @@ public abstract class ADictionary { * * @return the long count of bytes to store the dictionary. */ - public long getExactSizeOnDisk(){ + public long getExactSizeOnDisk() { return 1; } @@ -212,13 +212,15 @@ public abstract class ADictionary { */ public abstract double sumRow(int k, boolean square, int nrColumns); + public abstract double[] colSum(int[] counts, int nCol); + public abstract void colSum(double[] c, int[] counts, int[] colIndexes, boolean square); public abstract double sum(int[] counts, int ncol); public abstract double sumsq(int[] counts, int ncol); - public abstract StringBuilder getString(StringBuilder sb, int colIndexes); + public abstract String getString(int colIndexes); /** * This method adds the max and min values contained in the dictionary to corresponding cells in the ret variable. @@ -252,6 +254,8 @@ public abstract class ADictionary { public abstract long getNumberNonZeros(int[] counts, int nCol); + public abstract long getNumberNonZerosContained(); + /** * Copies and adds the dictionary entry from this dictionary to the d dictionary * @@ -268,4 +272,23 @@ public abstract class ADictionary { else return new Dictionary(((Bitmap) ubm).getValues()); } + + /** + * Get the most common tuple element contained in the dictionary + * + * returns null if that tuple is all zero values. + * + * @param counts The counts of the individual tuples contained, managed by the column group. + * @return a new double array containing the most common value + */ + public abstract double[] getMostCommonTuple(int[] counts, int nCol); + + /** + * Allocate a new dictionary where the tuple given is subtracted from all tuples in the previous dictionary. + * + * @param tuple a double list representing a tuple, it is given that the tuple with is the same as this + * dictionaries. + * @return a new instance of dictionary with the tuple subtracted. + */ + public abstract ADictionary subtractTuple(double[] tuple); } diff --git a/src/main/java/org/apache/sysds/runtime/compress/colgroup/dictionary/Dictionary.java b/src/main/java/org/apache/sysds/runtime/compress/colgroup/dictionary/Dictionary.java index 26d090d..b4f843f 100644 --- a/src/main/java/org/apache/sysds/runtime/compress/colgroup/dictionary/Dictionary.java +++ b/src/main/java/org/apache/sysds/runtime/compress/colgroup/dictionary/Dictionary.java @@ -85,9 +85,8 @@ public class Dictionary extends ADictionary { @Override public double aggregate(double init, Builtin fn) { // full aggregate can disregard tuple boundaries - int len = size(); double ret = init; - for(int i = 0; i < len; i++) + for(int i = 0; i < _values.length; i++) ret = fn.execute(ret, _values[i]); return ret; } @@ -249,12 +248,23 @@ public class Dictionary extends ADictionary { } @Override + public double[] colSum(int[] counts, int nCol) { + final double[] res = new double[nCol]; + int idx = 0; + for(int k = 0; k < _values.length / nCol; k++) { + final int cntk = counts[k]; + for(int j = 0; j < nCol; j++) + res[j] += _values[idx++] * cntk; + } + return res; + } + + @Override public void colSum(double[] c, int[] counts, int[] colIndexes, boolean square) { if(_values == null) return; - for(int k = 0; k < _values.length / colIndexes.length; k++) { - int cntk = counts[k]; + final int cntk = counts[k]; for(int j = 0; j < colIndexes.length; j++) { double v = _values[k * colIndexes.length + j]; if(square) @@ -328,17 +338,22 @@ public class Dictionary extends ADictionary { } } - public StringBuilder getString(StringBuilder sb, int colIndexes) { - sb.append("["); - for(int i = 0; i < _values.length - 1; i++) { - sb.append(_values[i]); - sb.append((i) % (colIndexes) == colIndexes - 1 ? ", " : ": "); - } - if(_values != null && _values.length > 0) { - sb.append(_values[_values.length - 1]); + public String getString(int colIndexes) { + StringBuilder sb = new StringBuilder(); + if(colIndexes == 1) + sb.append(Arrays.toString(_values)); + else { + sb.append("["); + for(int i = 0; i < _values.length - 1; i++) { + sb.append(_values[i]); + sb.append((i) % (colIndexes) == colIndexes - 1 ? "\n: " : ", "); + } + if(_values != null && _values.length > 0) { + sb.append(_values[_values.length - 1]); + } + sb.append("]"); } - sb.append("]"); - return sb; + return sb.toString(); } public ADictionary sliceOutColumnRange(int idxStart, int idxEnd, int previousNumberOfColumns) { @@ -419,4 +434,46 @@ public class Dictionary extends ADictionary { public boolean isLossy() { return false; } + + @Override + public long getNumberNonZerosContained() { + long count = 0; + for(double v : _values) { + if(v != 0.0) + count++; + } + return count; + } + + @Override + public double[] getMostCommonTuple(int[] counts, int nCol) { + int maxIndex = 0; + int maxCount = 0; + for(int i = 0; i < counts.length; i++) { + if(counts[i] > maxCount) { + maxCount = counts[i]; + maxIndex = i; + } + } + final double[] tuple = new double[nCol]; + boolean allZero = true; + for(int i = maxIndex * nCol, off = 0; i < (maxIndex + 1) * nCol && i < _values.length; i++, off++) { + final double v = _values[i]; + if(v != 0) { + tuple[off] = _values[i]; + allZero = false; + } + } + + return allZero ? null : tuple; + } + + @Override + public ADictionary subtractTuple(double[] tuple) { + double[] newValues = new double[_values.length]; + for(int i = 0; i < _values.length; i++) { + newValues[i] = _values[i] - tuple[i % tuple.length]; + } + return new Dictionary(newValues); + } } diff --git a/src/main/java/org/apache/sysds/runtime/compress/colgroup/dictionary/QDictionary.java b/src/main/java/org/apache/sysds/runtime/compress/colgroup/dictionary/QDictionary.java index 524bb32..a8559db 100644 --- a/src/main/java/org/apache/sysds/runtime/compress/colgroup/dictionary/QDictionary.java +++ b/src/main/java/org/apache/sysds/runtime/compress/colgroup/dictionary/QDictionary.java @@ -314,6 +314,20 @@ public class QDictionary extends ADictionary { } } + + @Override + public double[] colSum(int[] counts, int nCol){ + throw new NotImplementedException("Not Implemented"); + // final double[] res = new double[counts.length]; + // int idx = 0; + // for(int k = 0; k< _values.length / counts.length; k++){ + // final int cntk = counts[k]; + // for(int j = 0; j< counts.length; j++){ + // res[j] += _values[idx++] * cntk; + // } + // } + // return res; + } @Override public void colSum(double[] c, int[] counts, int[] colIndexes, boolean square) { throw new NotImplementedException("Not Implemented"); @@ -398,12 +412,13 @@ public class QDictionary extends ADictionary { } } - public StringBuilder getString(StringBuilder sb, int colIndexes) { + public String getString( int colIndexes) { + StringBuilder sb = new StringBuilder(); for(int i = 0; i < size(); i++) { sb.append(_values[i]); sb.append((i) % (colIndexes) == colIndexes - 1 ? "\n" : " "); } - return sb; + return sb.toString(); } public Dictionary makeDoubleDictionary() { @@ -470,4 +485,24 @@ public class QDictionary extends ADictionary { public boolean isLossy() { return false; } + + @Override + public long getNumberNonZerosContained(){ + long count = 0; + for(double v : _values){ + if(v != 0.0) + count++; + } + return count; + } + + @Override + public double[] getMostCommonTuple(int[] counts, int nCol){ + return null; + } + + @Override + public ADictionary subtractTuple(double[] tuple){ + throw new NotImplementedException(); + } } diff --git a/src/main/java/org/apache/sysds/runtime/compress/lib/CLALibCompAgg.java b/src/main/java/org/apache/sysds/runtime/compress/lib/CLALibCompAgg.java index 13b2007..f6c734c 100644 --- a/src/main/java/org/apache/sysds/runtime/compress/lib/CLALibCompAgg.java +++ b/src/main/java/org/apache/sysds/runtime/compress/lib/CLALibCompAgg.java @@ -83,7 +83,6 @@ public class CLALibCompAgg { else aggregateUnaryNormalCompressedMatrixBlock(inputMatrix, outputMatrix, op, blen, indexesIn, inCP); } - outputMatrix.recomputeNonZeros(); return outputMatrix; diff --git a/src/main/java/org/apache/sysds/runtime/compress/lib/CLALibLeftMultBy.java b/src/main/java/org/apache/sysds/runtime/compress/lib/CLALibLeftMultBy.java index b505c71..2779a68 100644 --- a/src/main/java/org/apache/sysds/runtime/compress/lib/CLALibLeftMultBy.java +++ b/src/main/java/org/apache/sysds/runtime/compress/lib/CLALibLeftMultBy.java @@ -35,10 +35,8 @@ import org.apache.sysds.runtime.compress.colgroup.AColGroup; import org.apache.sysds.runtime.compress.colgroup.AColGroup.CompressionType; import org.apache.sysds.runtime.compress.colgroup.ColGroupValue; import org.apache.sysds.runtime.compress.colgroup.pre.IPreAggregate; -import org.apache.sysds.runtime.functionobjects.SwapIndex; import org.apache.sysds.runtime.matrix.data.LibMatrixReorg; import org.apache.sysds.runtime.matrix.data.MatrixBlock; -import org.apache.sysds.runtime.matrix.operators.ReorgOperator; import org.apache.sysds.runtime.util.CommonThreadPool; public class CLALibLeftMultBy { @@ -46,16 +44,18 @@ public class CLALibLeftMultBy { public static MatrixBlock leftMultByMatrixTransposed(CompressedMatrixBlock m1, MatrixBlock m2, MatrixBlock ret, int k) { + if(m2.isEmpty()) + return ret; MatrixBlock transposed = new MatrixBlock(m2.getNumColumns(), m2.getNumRows(), false); LibMatrixReorg.transpose(m2, transposed); ret = leftMultByMatrix(m1, transposed, ret, k); ret.recomputeNonZeros(); return ret; - // return LibMatrixReorg.transpose(ret, new MatrixBlock(ret.getNumColumns(), ret.getNumRows(), false)); } public static MatrixBlock leftMultByMatrixTransposed(CompressedMatrixBlock m1, CompressedMatrixBlock m2, MatrixBlock ret, int k) { + prepareReturnMatrix(m1, m2, ret, true); leftMultByCompressedTransposedMatrix(m1.getColGroups(), m2, ret, k, m1.getNumColumns(), m1.getMaxNumValues(), m1.isOverlapping()); @@ -66,24 +66,14 @@ public class CLALibLeftMultBy { public static MatrixBlock leftMultByMatrix(CompressedMatrixBlock m1, MatrixBlock m2, MatrixBlock ret, int k) { prepareReturnMatrix(m1, m2, ret, false); - ret = leftMultByMatrix(m1.getColGroups(), m2, ret, false, m1.getNumColumns(), m1.isOverlapping(), k, - m1.getMaxNumValues()); + if(m2.isEmpty()) + return ret; + ret = leftMultByMatrix(m1.getColGroups(), m2, ret, k, m1.getNumColumns(), m1.getMaxNumValues(), + m1.isOverlapping()); ret.recomputeNonZeros(); return ret; } - private static MatrixBlock leftMultByMatrix(List<AColGroup> groups, MatrixBlock that, MatrixBlock ret, - boolean doTranspose, int numCols, boolean overlapping, int k, Pair<Integer, int[]> v) { - - if(doTranspose) { - ReorgOperator r_op = new ReorgOperator(SwapIndex.getSwapIndexFnObject(), k); - that = that.reorgOperations(r_op, new MatrixBlock(), 0, 0, 0); - } - - return leftMultByMatrix(groups, that, ret, k, numCols, v, overlapping); - - } - private static MatrixBlock prepareReturnMatrix(MatrixBlock m1, MatrixBlock m2, MatrixBlock ret, boolean doTranspose) { int numRowsOutput = doTranspose ? m2.getNumColumns() : m2.getNumRows(); @@ -95,9 +85,9 @@ public class CLALibLeftMultBy { return ret; } - public static void leftMultByTransposeSelf(CompressedMatrixBlock mb, MatrixBlock result, int k) { + // public static void leftMultByTransposeSelf(CompressedMatrixBlock mb, MatrixBlock result, int k) { - } + // } public static void leftMultByTransposeSelf(List<AColGroup> groups, MatrixBlock result, int k, int numColumns, Pair<Integer, int[]> v, boolean overlapping) { @@ -109,11 +99,18 @@ public class CLALibLeftMultBy { return; } - if(k <= 1 || overlapping) { - if(overlapping) - LOG.warn( - "Inefficient TSMM with overlapping matrix Could be implemented multi-threaded but is not yet."); + if(overlapping) { + LOG.warn("Inefficient TSMM with overlapping matrix could be implemented multi-threaded but is not yet."); leftMultByCompressedTransposedMatrix(groups, groups, result); + + result.recomputeNonZeros(); + return; + } + + if(k <= 1) { + for(int i = 0; i < groups.size(); i++) + leftMultByCompressedTransposedMatrix(groups.get(i), groups, result.getDenseBlockValues(), + result.getNumRows(), result.getNumColumns(), i, groups.size()); } else { try { @@ -132,8 +129,8 @@ public class CLALibLeftMultBy { throw new DMLRuntimeException(e); } } - copyToUpperTriangle(result.getDenseBlockValues(), numColumns); + copyToUpperTriangle(result.getDenseBlockValues(), numColumns); result.recomputeNonZeros(); } @@ -162,9 +159,10 @@ public class CLALibLeftMultBy { List<AColGroup> thatCGs = that.getColGroups(); Pair<Integer, int[]> thatV = that.getMaxNumValues(); - if(k <= 1 || overlapping || that.isOverlapping()){ + if(k <= 1 || overlapping || that.isOverlapping()) { if(overlapping || that.isOverlapping()) - LOG.warn("Inefficient Compressed multiplication with overlapping matrix could be implemented multi-threaded but is not yet."); + LOG.warn( + "Inefficient Compressed multiplication with overlapping matrix could be implemented multi-threaded but is not yet."); leftMultByCompressedTransposedMatrix(colGroups, thatCGs, ret); } else @@ -220,6 +218,7 @@ public class CLALibLeftMultBy { } catch(Exception e) { + e.printStackTrace(); throw new DMLRuntimeException(e); } return null; @@ -229,7 +228,6 @@ public class CLALibLeftMultBy { private static void leftMultByCompressedTransposedMatrix(List<AColGroup> thisCG, List<AColGroup> thatCG, MatrixBlock ret) { double[] c = ret.getDenseBlockValues(); - for(AColGroup lhs : thatCG) { leftMultByCompressedTransposedMatrix(lhs, thisCG, c, ret.getNumRows(), ret.getNumColumns(), 0, thisCG.size()); @@ -256,16 +254,9 @@ public class CLALibLeftMultBy { ret.allocateDenseBlock(); double[] retV = ret.getDenseBlockValues(); - // for(int b = 0; b < db.numBlocks(); b++) { - // int blockSize = db.blockSize(b); - // blockU = Math.min(blockL + blockSize, ret.getNumRows()); - - if(k == 1) { - LOG.trace("Single treaded left matrix multiplication"); - for(int j = 0; j < colGroups.size(); j++) { + if(k == 1) + for(int j = 0; j < colGroups.size(); j++) colGroups.get(j).leftMultByMatrix(that, retV, numColumns); - } - } else { try { ExecutorService pool = CommonThreadPool.get(k); diff --git a/src/main/java/org/apache/sysds/runtime/compress/lib/CLALibRightMultBy.java b/src/main/java/org/apache/sysds/runtime/compress/lib/CLALibRightMultBy.java index 5417470..5b0a105 100644 --- a/src/main/java/org/apache/sysds/runtime/compress/lib/CLALibRightMultBy.java +++ b/src/main/java/org/apache/sysds/runtime/compress/lib/CLALibRightMultBy.java @@ -20,7 +20,9 @@ package org.apache.sysds.runtime.compress.lib; import java.util.ArrayList; +import java.util.HashSet; import java.util.List; +import java.util.Set; import java.util.concurrent.Callable; import java.util.concurrent.ExecutionException; import java.util.concurrent.ExecutorService; @@ -32,7 +34,7 @@ import org.apache.commons.logging.LogFactory; import org.apache.sysds.runtime.DMLRuntimeException; import org.apache.sysds.runtime.compress.CompressedMatrixBlock; import org.apache.sysds.runtime.compress.colgroup.AColGroup; -import org.apache.sysds.runtime.compress.colgroup.ColGroupUncompressed; +import org.apache.sysds.runtime.compress.colgroup.ColGroupEmpty; import org.apache.sysds.runtime.matrix.data.MatrixBlock; import org.apache.sysds.runtime.util.CommonThreadPool; @@ -55,34 +57,18 @@ public class CLALibRightMultBy { that = that instanceof CompressedMatrixBlock ? ((CompressedMatrixBlock) that).decompress(k) : that; MatrixBlock m = rightMultByMatrixOverlapping(colGroups, that, ret, k, v); + if(m instanceof CompressedMatrixBlock) if(allowOverlappingOutput(colGroups, allowOverlap)) return m; - else { - CompressedMatrixBlock outBlock = (CompressedMatrixBlock) m; - ColGroupUncompressed uccg = findUncompressedColGroup(outBlock.getColGroups()); - if(uccg == null) - return outBlock.decompress(k); - else{ - MatrixBlock outputBlock = uccg.getData(); - outputBlock.sparseToDense(); - return outBlock.decompress(outputBlock, k); - } - } + else + return ((CompressedMatrixBlock) m).decompress(k); else return m; } - private static ColGroupUncompressed findUncompressedColGroup(List<AColGroup> colGroups){ - for(AColGroup g: colGroups){ - if(g instanceof ColGroupUncompressed) - return (ColGroupUncompressed) g; - } - return null; - } - private static boolean allowOverlappingOutput(List<AColGroup> colGroups, boolean allowOverlap) { - + if(!allowOverlap) { LOG.debug("Not Overlapping because it is not allowed"); return false; @@ -91,18 +77,18 @@ public class CLALibRightMultBy { return true; // int distinctCount = 0; // for(AColGroup g : colGroups) { - // if(g instanceof ColGroupCompressed) - // distinctCount += ((ColGroupCompressed) g).getNumValues(); - // else { - // LOG.debug("Not Overlapping because there is an un-compressed column group"); - // return false; - // } + // if(g instanceof ColGroupCompressed) + // distinctCount += ((ColGroupCompressed) g).getNumValues(); + // else { + // LOG.debug("Not Overlapping because there is an un-compressed column group"); + // return false; + // } // } // final int threshold = colGroups.get(0).getNumRows() / 2; // boolean allow = distinctCount <= threshold; // if(LOG.isDebugEnabled() && !allow) - // LOG.debug("Not Allowing Overlap because of number of distinct items in compression: " + distinctCount - // + " is greater than threshold: " + threshold); + // LOG.debug("Not Allowing Overlap because of number of distinct items in compression: " + distinctCount + // + " is greater than threshold: " + threshold); // return allow; } @@ -122,9 +108,15 @@ public class CLALibRightMultBy { CompressedMatrixBlock ret, int k, Pair<Integer, int[]> v) { List<AColGroup> retCg = new ArrayList<>(); + boolean containsNull = false; if(k == 1) { - for(AColGroup g : colGroups) - retCg.add(g.rightMultByMatrix(that)); + for(AColGroup g : colGroups) { + AColGroup retG = g.rightMultByMatrix(that); + if(retG != null) + retCg.add(retG); + else + containsNull = true; + } } else { ExecutorService pool = CommonThreadPool.get(k); @@ -132,10 +124,12 @@ public class CLALibRightMultBy { List<Callable<AColGroup>> tasks = new ArrayList<>(colGroups.size()); for(AColGroup g : colGroups) tasks.add(new RightMatrixMultTask(g, that)); - for(Future<AColGroup> fg : pool.invokeAll(tasks)){ + for(Future<AColGroup> fg : pool.invokeAll(tasks)) { AColGroup g = fg.get(); if(g != null) retCg.add(g); + else + containsNull = true; } } catch(InterruptedException | ExecutionException e) { @@ -145,9 +139,32 @@ public class CLALibRightMultBy { ret.allocateColGroupList(retCg); if(retCg.size() > 1) ret.setOverlapping(true); + + if(containsNull) { + ColGroupEmpty cge = findEmptyColumnsAndMakeEmptyColGroup(retCg, ret.getNumColumns()); + if(cge != null) + retCg.add(cge); + } return ret; } + private static ColGroupEmpty findEmptyColumnsAndMakeEmptyColGroup(List<AColGroup> colGroups, int nCols) { + Set<Integer> emptyColumns = new HashSet<Integer>(nCols); + for(int i = 0; i < nCols; i++) + emptyColumns.add(i); + + for(AColGroup g : colGroups) + for(int c : g.getColIndices()) + emptyColumns.remove(c); + + if(emptyColumns.size() != 0) { + int[] emptyColumnsFinal = emptyColumns.stream().mapToInt(Integer::intValue).toArray(); + return new ColGroupEmpty(emptyColumnsFinal, colGroups.get(0).getNumRows()); + } + else + return null; + } + private static class RightMatrixMultTask implements Callable<AColGroup> { private final AColGroup _colGroup; private final MatrixBlock _b; @@ -163,7 +180,6 @@ public class CLALibRightMultBy { return _colGroup.rightMultByMatrix(_b); } catch(Exception e) { - LOG.error(e); throw new DMLRuntimeException(e); } } diff --git a/src/main/java/org/apache/sysds/runtime/compress/lib/CLALibScalar.java b/src/main/java/org/apache/sysds/runtime/compress/lib/CLALibScalar.java index 11e0b8b..e3ef846 100644 --- a/src/main/java/org/apache/sysds/runtime/compress/lib/CLALibScalar.java +++ b/src/main/java/org/apache/sysds/runtime/compress/lib/CLALibScalar.java @@ -33,6 +33,7 @@ import org.apache.sysds.hops.OptimizerUtils; import org.apache.sysds.runtime.DMLRuntimeException; import org.apache.sysds.runtime.compress.CompressedMatrixBlock; import org.apache.sysds.runtime.compress.colgroup.AColGroup; +import org.apache.sysds.runtime.compress.colgroup.ColGroupCompressed; import org.apache.sysds.runtime.compress.colgroup.ColGroupConst; import org.apache.sysds.runtime.compress.colgroup.ColGroupOLE; import org.apache.sysds.runtime.compress.colgroup.ColGroupUncompressed; @@ -180,7 +181,7 @@ public class CLALibScalar { tasks.add(new ScalarTask(uc, sop)); } else { - int nv = ((ColGroupValue) grp).getNumValues() * grp.getColIndices().length; + int nv = ((ColGroupCompressed) grp).getNumValues() * grp.getColIndices().length; if(nv < MINIMUM_PARALLEL_SIZE && !(grp instanceof ColGroupOLE)) { small.add(grp); } diff --git a/src/main/java/org/apache/sysds/runtime/matrix/data/MatrixValue.java b/src/main/java/org/apache/sysds/runtime/matrix/data/MatrixValue.java index c9838d5..98a5536 100644 --- a/src/main/java/org/apache/sysds/runtime/matrix/data/MatrixValue.java +++ b/src/main/java/org/apache/sysds/runtime/matrix/data/MatrixValue.java @@ -97,7 +97,7 @@ public abstract class MatrixValue implements WritableComparable public abstract void reset(int rl, int cl, double v); /** - * Copy this MatrixValue into that MatrixValue. + * Copy that MatrixValue into this MatrixValue. * * If the MatrixValue is a MatrixBlock evaluate the sparsity of the original matrix, * and copy into either a sparse or a dense matrix. @@ -107,7 +107,7 @@ public abstract class MatrixValue implements WritableComparable public abstract void copy(MatrixValue that); /** - * Copy this MatrixValue into that MatrixValue. But select sparse destination block depending on boolean parameter. + * Copy that MatrixValue into this MatrixValue. But select sparse destination block depending on boolean parameter. * * @param that object to copy the values into. * @param sp boolean specifying if output should be forced sparse or dense. (only applicable if the 'that' is a MatrixBlock) diff --git a/src/test/java/org/apache/sysds/test/component/compress/CompressedTestBase.java b/src/test/java/org/apache/sysds/test/component/compress/CompressedTestBase.java index a93edd2..1167512 100644 --- a/src/test/java/org/apache/sysds/test/component/compress/CompressedTestBase.java +++ b/src/test/java/org/apache/sysds/test/component/compress/CompressedTestBase.java @@ -85,7 +85,7 @@ public abstract class CompressedTestBase extends TestBase { protected static OverLapping[] overLapping = new OverLapping[] { // OverLapping.COL, - OverLapping.PLUS, OverLapping.MATRIX, OverLapping.NONE, + OverLapping.PLUS, OverLapping.MATRIX, OverLapping.NONE, OverLapping.APPEND_EMPTY, OverLapping.APPEND_CONST, // OverLapping.MATRIX_PLUS, // OverLapping.SQUASH, // OverLapping.MATRIX_MULT_NEGATIVE @@ -217,11 +217,23 @@ public abstract class CompressedTestBase extends TestBase { lossyTolerance = lossyTolerance * 160; cols = 2; break; + case APPEND_EMPTY: + tmp = new MatrixBlock(rows, 1, 0); + break; + case APPEND_CONST: + tmp = new MatrixBlock(rows, 1, 0) + .scalarOperations(new RightScalarOperator(Plus.getPlusFnObject(), 1), new MatrixBlock()); + break; default: break; } if(cmb instanceof CompressedMatrixBlock) { - if(tmp != null) { + if(tmp != null && ov == OverLapping.APPEND_EMPTY || ov == OverLapping.APPEND_CONST) { + mb = mb.append(tmp, new MatrixBlock()); + cmb = cmb.append(tmp, new MatrixBlock()); + cols += tmp.getNumColumns(); + } + else if(tmp != null) { // Make Operator AggregateBinaryOperator abop = InstructionUtils.getMatMultOperator(_k); @@ -337,10 +349,10 @@ public abstract class CompressedTestBase extends TestBase { return; // Input was not compressed then just pass test MatrixBlock vector1 = DataConverter - .convertToMatrixBlock(TestUtils.generateTestMatrix(cols, 1, 0.9, 1.1, 1.0, 3)); + .convertToMatrixBlock(TestUtils.generateTestMatrix(cols, 1, 0.9, 1.5, 1.0, 3)); MatrixBlock vector2 = (ctype == ChainType.XtwXv) ? DataConverter - .convertToMatrixBlock(TestUtils.generateTestMatrix(rows, 1, 0.9, 1.1, 1.0, 3)) : null; + .convertToMatrixBlock(TestUtils.generateTestMatrix(rows, 1, 0.9, 1.5, 1.0, 3)) : null; // matrix-vector uncompressed MatrixBlock ret1 = mb.chainMatrixMultOperations(vector1, vector2, new MatrixBlock(), ctype, _k); @@ -681,7 +693,6 @@ public abstract class CompressedTestBase extends TestBase { try { if(!(cmb instanceof CompressedMatrixBlock)) return; // Input was not compressed then just pass test - // ChainType ctype = ChainType.XtwXv; for(MMTSJType mType : new MMTSJType[] {MMTSJType.LEFT, // MMTSJType.RIGHT }) { @@ -691,8 +702,6 @@ public abstract class CompressedTestBase extends TestBase { // matrix-vector compressed MatrixBlock ret2 = cmb.transposeSelfMatrixMultOperations(new MatrixBlock(), mType, _k); - // LOG.error(ret1); - // LOG.error(ret2); // compare result with input compareResultMatrices(ret1, ret2, 100); } diff --git a/src/test/java/org/apache/sysds/test/component/compress/TestConstants.java b/src/test/java/org/apache/sysds/test/component/compress/TestConstants.java index 0fcff40..c936c1b 100644 --- a/src/test/java/org/apache/sysds/test/component/compress/TestConstants.java +++ b/src/test/java/org/apache/sysds/test/component/compress/TestConstants.java @@ -57,7 +57,7 @@ public class TestConstants { } public enum OverLapping { - COL, MATRIX, NONE, MATRIX_PLUS, MATRIX_MULT_NEGATIVE, SQUASH, PLUS; + COL, MATRIX, NONE, MATRIX_PLUS, MATRIX_MULT_NEGATIVE, SQUASH, PLUS, APPEND_EMPTY, APPEND_CONST; public static boolean effectOnOutput(OverLapping opcode) { switch(opcode) {
