[SYSTEMML-810] New compressed matrix blocks and operations, tests For details, see https://issues.apache.org/jira/browse/SYSTEMML-449.
Project: http://git-wip-us.apache.org/repos/asf/incubator-systemml/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-systemml/commit/16e7b1c8 Tree: http://git-wip-us.apache.org/repos/asf/incubator-systemml/tree/16e7b1c8 Diff: http://git-wip-us.apache.org/repos/asf/incubator-systemml/diff/16e7b1c8 Branch: refs/heads/master Commit: 16e7b1c88e45e007d1db229717311fcf70bc6b19 Parents: 71013e7 Author: Matthias Boehm <[email protected]> Authored: Sat Jul 16 00:43:16 2016 -0700 Committer: Matthias Boehm <[email protected]> Committed: Sat Jul 16 17:22:50 2016 -0700 ---------------------------------------------------------------------- .../runtime/compress/BitmapDecoderOLE.java | 127 ++ .../runtime/compress/BitmapDecoderRLE.java | 117 ++ .../sysml/runtime/compress/BitmapEncoder.java | 392 +++++ .../apache/sysml/runtime/compress/ColGroup.java | 259 ++++ .../sysml/runtime/compress/ColGroupBitmap.java | 498 +++++++ .../sysml/runtime/compress/ColGroupOLE.java | 634 +++++++++ .../sysml/runtime/compress/ColGroupRLE.java | 617 ++++++++ .../runtime/compress/ColGroupUncompressed.java | 360 +++++ .../runtime/compress/CompressedMatrixBlock.java | 1342 ++++++++++++++++++ .../runtime/compress/PlanningBinPacker.java | 191 +++ .../sysml/runtime/compress/PlanningCoCoder.java | 227 +++ .../runtime/compress/PlanningCoCodingGroup.java | 104 ++ .../compress/PlanningGroupMergeAction.java | 73 + .../runtime/compress/ReaderColumnSelection.java | 64 + .../compress/ReaderColumnSelectionDense.java | 68 + .../ReaderColumnSelectionDenseSample.java | 86 ++ .../compress/ReaderColumnSelectionSparse.java | 115 ++ .../runtime/compress/UncompressedBitmap.java | 101 ++ .../compress/estim/CompressedSizeEstimator.java | 145 ++ .../estim/CompressedSizeEstimatorExact.java | 53 + .../estim/CompressedSizeEstimatorSample.java | 767 ++++++++++ .../compress/estim/CompressedSizeInfo.java | 69 + .../compress/estim/SizeEstimatorFactory.java | 40 + .../runtime/compress/utils/ConverterUtils.java | 99 ++ .../sysml/runtime/compress/utils/DblArray.java | 91 ++ .../compress/utils/DblArrayIntListHashMap.java | 179 +++ .../compress/utils/DoubleIntListHashMap.java | 181 +++ .../runtime/compress/utils/IntArrayList.java | 102 ++ .../compress/utils/LinearAlgebraUtils.java | 383 +++++ .../runtime/functionobjects/KahanFunction.java | 8 + .../runtime/functionobjects/KahanPlus.java | 5 + .../runtime/functionobjects/KahanPlusSq.java | 15 +- .../runtime/matrix/data/LibMatrixMult.java | 27 +- .../sysml/runtime/matrix/data/MatrixBlock.java | 2 +- .../compress/BasicCompressionTest.java | 168 +++ .../compress/BasicMatrixAppendTest.java | 176 +++ .../compress/BasicMatrixMultChainTest.java | 245 ++++ .../BasicMatrixTransposeSelfMultTest.java | 172 +++ .../compress/BasicMatrixVectorMultTest.java | 180 +++ .../BasicScalarOperationsSparseUnsafeTest.java | 177 +++ .../compress/BasicScalarOperationsTest.java | 177 +++ .../BasicTransposeSelfLeftMatrixMultTest.java | 172 +++ .../compress/BasicUnaryAggregateTest.java | 544 +++++++ .../compress/BasicVectorMatrixMultTest.java | 180 +++ .../functions/compress/CompressedLinregCG.java | 151 ++ .../compress/CompressedSerializationTest.java | 185 +++ .../compress/LargeCompressionTest.java | 169 +++ .../compress/LargeMatrixVectorMultTest.java | 180 +++ .../compress/LargeParMatrixVectorMultTest.java | 182 +++ .../compress/LargeVectorMatrixMultTest.java | 180 +++ .../compress/ParMatrixMultChainTest.java | 247 ++++ .../compress/ParMatrixVectorMultTest.java | 182 +++ .../ParTransposeSelfLeftMatrixMultTest.java | 174 +++ .../compress/ParUnaryAggregateTest.java | 547 +++++++ .../compress/ParVectorMatrixMultTest.java | 181 +++ src/test/scripts/functions/compress/LinregCG.R | 57 + .../scripts/functions/compress/LinregCG.dml | 56 + .../compress/SystemML-config-compress.xml | 59 + .../functions/compress/ZPackageSuite.java | 56 + 59 files changed, 12322 insertions(+), 16 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/16e7b1c8/src/main/java/org/apache/sysml/runtime/compress/BitmapDecoderOLE.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/runtime/compress/BitmapDecoderOLE.java b/src/main/java/org/apache/sysml/runtime/compress/BitmapDecoderOLE.java new file mode 100644 index 0000000..fc6e861 --- /dev/null +++ b/src/main/java/org/apache/sysml/runtime/compress/BitmapDecoderOLE.java @@ -0,0 +1,127 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.sysml.runtime.compress; + +import java.util.Arrays; +import java.util.Iterator; + +/** + * General-purpose iterator to decode a compressed OLE bitmap. + * + */ +public final class BitmapDecoderOLE implements Iterator<Integer> +{ + // pointer to the compressed bitmap + private int _bmOff; + private int _bmLen; + private char[] _bmPtr; + + // The index of the current block. Block 0 covers bits 1 through 2^16 + private int _blockIx; + + // The offset where the current block starts within the bitmap. + private int _blockStartOffset; + + // The number of offsets in the current block. + private int _curBlockSize; + + // The offset <b>in the current block</b> the <b>next</b> element we will + // read from the bitmap, or bmPtr.length if we are done. + private int _nextBmOffset; + + /** + * Point this object at the beginning of a particular bitmap. After a call + * to this method, the next call to {@link #nextOffset()} will return the + * offset of the first bit in the specified bitmap. + * + * @param bmPtr + * pointer to a compressed bitmap + */ + public BitmapDecoderOLE(char[] bmPtr, int off, int len) { + _bmOff = off; + _bmLen = len; + _bmPtr = bmPtr; + _blockIx = 0; + _blockStartOffset = 0; + _curBlockSize = _bmPtr[_bmOff+_blockStartOffset]; + if (_curBlockSize < 0) { + throw new RuntimeException(String.format( + "Negative block size %d at position %d of %s", + _curBlockSize, _blockStartOffset, Arrays.toString(bmPtr))); + } + _nextBmOffset = 0; + + // Advance past any zero-length blocks at the beginning of the array + while (_blockStartOffset < _bmLen + && _nextBmOffset >= _curBlockSize) { + advanceToNextBlock(); + } + } + + @Override + public Integer next() { + if( !hasNext() ) + throw new RuntimeException("No next offset existing."); + + // Grab the lookahead value Note the +1 in the array indexing; + // the first number in a block is the block size + int offsetFromBlockBegin = _bmPtr[_bmOff+_blockStartOffset + 1 + _nextBmOffset]; + int ret = (_blockIx * BitmapEncoder.BITMAP_BLOCK_SZ) + + offsetFromBlockBegin; + _nextBmOffset++; + + // Advance to next non-empty block if we reached the end of the block. + while (_blockStartOffset < _bmLen && _nextBmOffset >= _curBlockSize) { + advanceToNextBlock(); + } + + return ret; + } + + @Override + public boolean hasNext() { + return _blockStartOffset < _bmLen; + } + + @Override + public void remove() { + throw new RuntimeException("Not implemented for BitmapDecoderOLE."); + } + + /** + * Move forward to the next block. Does not skip empty blocks. + */ + private void advanceToNextBlock() { + _blockStartOffset += (1 + _curBlockSize); + _blockIx++; + if (_blockStartOffset >= _bmLen) { + // Read past last block + return; + } + + _curBlockSize = _bmPtr[_bmOff+_blockStartOffset]; + if (_curBlockSize < 0) { + throw new RuntimeException(String.format( + "Negative block size %d at position %d of %s", + _curBlockSize, _blockStartOffset, Arrays.toString(_bmPtr))); + } + _nextBmOffset = 0; + } +} http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/16e7b1c8/src/main/java/org/apache/sysml/runtime/compress/BitmapDecoderRLE.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/runtime/compress/BitmapDecoderRLE.java b/src/main/java/org/apache/sysml/runtime/compress/BitmapDecoderRLE.java new file mode 100644 index 0000000..54f24ae --- /dev/null +++ b/src/main/java/org/apache/sysml/runtime/compress/BitmapDecoderRLE.java @@ -0,0 +1,117 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.sysml.runtime.compress; + +import java.util.Iterator; + +/** + * General-purpose iterator to decode a compressed OLE bitmap. + * + */ +public final class BitmapDecoderRLE implements Iterator<Integer> +{ + // pointer to the compressed bitmap + private int _bmOff; + private int _bmLen; + private char[] _bmPtr; + + // The offset of the <b>next</b> element we will read from the bitmap, or + // bmPtr.length if we are done. + private int _nextBmOffset; + + // The offset in the matrix column of the beginning of the current run + private int _runStartOffset; + + // The length of the current run + private int _curRunLen; + + // The number of bits that we have returned from the current run. + private int _runBitsReturned; + + /** + * Point this object at the beginning of a particular bitmap. After a call + * to this method, the next call to {@link #nextOffset()} will return the + * offset of the first bit in the specified bitmap. + * + * @param bmPtr + * pointer to a compressed bitmap + */ + public BitmapDecoderRLE(char[] bmPtr, int off, int len) { + _bmOff = off; + _bmLen = len; + _bmPtr = bmPtr; + _nextBmOffset = 0; + _runStartOffset = 0; + _curRunLen = 0; + _runBitsReturned = 0; + + if (0 == _bmLen) { + return; //no runs + } + + // Advance to the beginning of the first non-empty run. + advanceToNextRun(); + } + + @Override + public Integer next() { + if( !hasNext() ) + throw new RuntimeException("No next offset existing."); + + // Grab the lookahead value + int ret = _runStartOffset + _runBitsReturned; + + _runBitsReturned++; + + // Check for end of run + if (_runBitsReturned == _curRunLen) { + advanceToNextRun(); + } + + return ret; + } + + @Override + public boolean hasNext() { + return _runBitsReturned < _curRunLen; + } + + @Override + public void remove() { + throw new RuntimeException("Not implemented for BitmapDecoderRLE."); + } + + /** Move forward to the next non-empty run. */ + private void advanceToNextRun() { + // While loop needed because some runs are of length 0 + while (_runBitsReturned == _curRunLen && _nextBmOffset < _bmLen) { + + _runBitsReturned = 0; + + // Read the distance to the next run + char delta = _bmPtr[_bmOff + _nextBmOffset]; + + // Run length is stored in the next element of the array + _runStartOffset += delta + _curRunLen; + _curRunLen = _bmPtr[_bmOff + _nextBmOffset + 1]; + _nextBmOffset += 2; + } + } +} http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/16e7b1c8/src/main/java/org/apache/sysml/runtime/compress/BitmapEncoder.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/runtime/compress/BitmapEncoder.java b/src/main/java/org/apache/sysml/runtime/compress/BitmapEncoder.java new file mode 100644 index 0000000..8a535e1 --- /dev/null +++ b/src/main/java/org/apache/sysml/runtime/compress/BitmapEncoder.java @@ -0,0 +1,392 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.sysml.runtime.compress; + +import java.util.ArrayList; +import java.util.Arrays; + +import org.apache.sysml.runtime.DMLRuntimeException; +import org.apache.sysml.runtime.compress.utils.DblArray; +import org.apache.sysml.runtime.compress.utils.DblArrayIntListHashMap; +import org.apache.sysml.runtime.compress.utils.DoubleIntListHashMap; +import org.apache.sysml.runtime.compress.utils.IntArrayList; +import org.apache.sysml.runtime.matrix.data.MatrixBlock; +import org.apache.sysml.runtime.matrix.data.SparseBlock; + + +/** + * Static functions for encoding bitmaps in various ways. + * + */ +public class BitmapEncoder +{ + /** Size of the blocks used in a blocked bitmap representation. */ + public static final int BITMAP_BLOCK_SZ = 65536; + + /** + * Generate uncompressed bitmaps for a set of columns in an uncompressed + * matrix block. + * + * @param colIndices + * indexes (within the block) of the columns to extract + * @param rawblock + * an uncompressed matrix block; can be dense or sparse + * @return uncompressed bitmap representation of the columns + * @throws DMLRuntimeException + */ + public static UncompressedBitmap extractBitmap(int[] colIndices, MatrixBlock rawblock) + { + //note: no sparse column selection reader because low potential + //single column selection + if( colIndices.length==1 ) { + return extractBitmap(colIndices[0], rawblock, + !CompressedMatrixBlock.MATERIALIZE_ZEROS); + } + //multiple column selection (general case) + else { + ReaderColumnSelection reader = null; + if( rawblock.isInSparseFormat() && CompressedMatrixBlock.TRANSPOSE_INPUT ) + reader = new ReaderColumnSelectionSparse(rawblock, colIndices, + !CompressedMatrixBlock.MATERIALIZE_ZEROS); + else + reader = new ReaderColumnSelectionDense(rawblock, colIndices, + !CompressedMatrixBlock.MATERIALIZE_ZEROS); + + return extractBitmap(colIndices, rawblock, reader); + } + } + + /** + * + * @param colIndices + * @param rawblock + * @param sampleIndexes + * @return + */ + public static UncompressedBitmap extractBitmapFromSample(int[] colIndices, + MatrixBlock rawblock, int[] sampleIndexes) + { + //note: no sparse column selection reader because low potential + + //single column selection + if( colIndices.length==1 ) { + return extractBitmap(colIndices[0], rawblock, sampleIndexes, + !CompressedMatrixBlock.MATERIALIZE_ZEROS); + } + //multiple column selection (general case) + else { + return extractBitmap(colIndices, rawblock, + new ReaderColumnSelectionDenseSample(rawblock, colIndices, + sampleIndexes, !CompressedMatrixBlock.MATERIALIZE_ZEROS)); + } + } + + /** + * Encodes the bitmap as a series of run lengths and offsets. + * <p> + * <b>NOTE: This method must be kept in sync with {@link BitmapDecoderRLE} + * !</b> + * + * @param offsets + * uncompressed contents of the bitmap, expressed as a list of + * the offsets of different bits + * @return compressed version of said bitmap + */ + public static char[] genRLEBitmap(int[] offsets) { + if( offsets.length == 0 ) + return new char[0]; //empty list + + // Use an ArrayList for correctness at the expense of temp space + ArrayList<Character> buf = new ArrayList<Character>(); + + // 1 + (position of last 1 in the previous run of 1's) + // We add 1 because runs may be of length zero. + int lastRunEnd = 0; + + // Offset between the end of the previous run of 1's and the first 1 in + // the current run. Initialized below. + int curRunOff; + + // Length of the most recent run of 1's + int curRunLen = 0; + + // Current encoding is as follows: + // Negative entry: abs(Entry) encodes the offset to the next lone 1 bit. + // Positive entry: Entry encodes offset to next run of 1's. The next + // entry in the bitmap holds a run length. + + // Special-case the first run to simplify the loop below. + int firstOff = offsets[0]; + + // The first run may start more than a short's worth of bits in + while (firstOff > Character.MAX_VALUE) { + buf.add(Character.MAX_VALUE); + buf.add((char) 0); + firstOff -= Character.MAX_VALUE; + lastRunEnd += Character.MAX_VALUE; + } + + // Create the first run with an initial size of 1 + curRunOff = firstOff; + curRunLen = 1; + + // Process the remaining offsets + for (int i = 1; i < offsets.length; i++) { + + int absOffset = offsets[i]; + + // 1 + (last position in run) + int curRunEnd = lastRunEnd + curRunOff + curRunLen; + + if (absOffset > curRunEnd || curRunLen >= Character.MAX_VALUE) { + // End of a run, either because we hit a run of 0's or because the + // number of 1's won't fit in 16 bits. Add run to bitmap and start a new one. + buf.add((char) curRunOff); + buf.add((char) curRunLen); + + lastRunEnd = curRunEnd; + curRunOff = absOffset - lastRunEnd; + + while (curRunOff > Character.MAX_VALUE) { + // SPECIAL CASE: Offset to next run doesn't fit into 16 bits. + // Add zero-length runs until the offset is small enough. + buf.add(Character.MAX_VALUE); + buf.add((char) 0); + lastRunEnd += Character.MAX_VALUE; + curRunOff -= Character.MAX_VALUE; + } + + curRunLen = 1; + } else { + // Middle of a run + curRunLen++; + } + } + + // Close out the last run + if (curRunLen >= 1) { + buf.add((char) curRunOff); + buf.add((char) curRunLen); + } + + // Convert wasteful ArrayList to packed array. + char[] ret = new char[buf.size()]; + for (int i = 0; i < buf.size(); i++) { + ret[i] = buf.get(i); + } + return ret; + } + + /** + * Encodes the bitmap in blocks of offsets. Within each block, the bits are + * stored as absolute offsets from the start of the block. + * + * @param offsets + * uncompressed contents of the bitmap, expressed as a list of + * the offsets of different bits + * @return compressed version of said bitmap + */ + public static char[] genOffsetBitmap(int[] offsets) + { + int lastOffset = offsets[offsets.length - 1]; + + // Build up the blocks + int numBlocks = (lastOffset / BITMAP_BLOCK_SZ) + 1; + // To simplify the logic, we make two passes. + // The first pass divides the offsets by block. + int[] blockLengths = new int[numBlocks]; + Arrays.fill(blockLengths, 0); + + for (int ix = 0; ix < offsets.length; ix++) { + int val = offsets[ix]; + int blockForVal = val / BITMAP_BLOCK_SZ; + + blockLengths[blockForVal]++; + } + + // The second pass creates the blocks. + int totalSize = numBlocks; + for (int block = 0; block < numBlocks; block++) { + totalSize += blockLengths[block]; + } + char[] encodedBlocks = new char[totalSize]; + + int inputIx = 0; + int blockStartIx = 0; + for (int block = 0; block < numBlocks; block++) { + int blockSz = blockLengths[block]; + + // First entry in the block is number of bits + encodedBlocks[blockStartIx] = (char) blockSz; + + for (int i = 0; i < blockSz; i++) { + encodedBlocks[blockStartIx + i + 1] = (char) + (offsets[inputIx+i] % BITMAP_BLOCK_SZ); + } + + inputIx += blockSz; + blockStartIx += blockSz + 1; + } + + return encodedBlocks; + } + + /** + * + * @param colIndex + * @param rawblock + * @return + * @throws DMLRuntimeException + */ + private static UncompressedBitmap extractBitmap(int colIndex, MatrixBlock rawblock, boolean skipZeros) + { + //probe map for distinct items (for value or value groups) + DoubleIntListHashMap distinctVals = new DoubleIntListHashMap(); + + //scan rows and probe/build distinct items + final int m = CompressedMatrixBlock.TRANSPOSE_INPUT ? + rawblock.getNumColumns():rawblock.getNumRows(); + + if( rawblock.isInSparseFormat() //SPARSE + && CompressedMatrixBlock.TRANSPOSE_INPUT ) + { + SparseBlock a = rawblock.getSparseBlock(); + if( a != null && !a.isEmpty(colIndex) ) + { + int apos = a.pos(colIndex); + int alen = a.size(colIndex); + int[] aix = a.indexes(colIndex); + double[] avals = a.values(colIndex); + + IntArrayList lstPtr0 = new IntArrayList(); //for 0 values + int last = -1; + //iterate over non-zero entries but fill in zeros + for( int j=apos; j<apos+alen; j++ ) + { + //fill in zero values + if( !skipZeros ) + for( int k=last+1; k<aix[j]; k++ ) + lstPtr0.appendValue(k); + //handle non-zero value + IntArrayList lstPtr = distinctVals.get(avals[j]); + if( lstPtr == null ) { + lstPtr = new IntArrayList(); + distinctVals.appendValue(avals[j], lstPtr); + } + lstPtr.appendValue(aix[j]); + last = aix[j]; + } + //fill in remaining zero values + if( !skipZeros ) { + for( int k=last+1; k<m; k++ ) + lstPtr0.appendValue(k); + if( lstPtr0.size()>0 ) + distinctVals.appendValue(0, lstPtr0); + } + } + else if( !skipZeros ) { //full 0 column + IntArrayList lstPtr = new IntArrayList(); + for( int i=0; i<m; i++ ) + lstPtr.appendValue(i); + distinctVals.appendValue(0, lstPtr); + } + } + else //GENERAL CASE + { + for( int i=0; i<m; i++ ) { + double val = CompressedMatrixBlock.TRANSPOSE_INPUT ? + rawblock.quickGetValue(colIndex, i): + rawblock.quickGetValue(i, colIndex); + if( val!=0 || !skipZeros ) { + IntArrayList lstPtr = distinctVals.get(val); + if( lstPtr == null ) { + lstPtr = new IntArrayList(); + distinctVals.appendValue(val, lstPtr); + } + lstPtr.appendValue(i); + } + } + } + + return new UncompressedBitmap(distinctVals); + } + + /** + * + * @param colIndex + * @param rawblock + * @param sampleIndexes + * @return + */ + private static UncompressedBitmap extractBitmap(int colIndex, MatrixBlock rawblock, int[] sampleIndexes, boolean skipZeros) + { + //note: general case only because anyway binary search for small samples + + //probe map for distinct items (for value or value groups) + DoubleIntListHashMap distinctVals = new DoubleIntListHashMap(); + + //scan rows and probe/build distinct items + final int m = sampleIndexes.length; + for( int i=0; i<m; i++ ) { + int rowIndex = sampleIndexes[i]; + double val = CompressedMatrixBlock.TRANSPOSE_INPUT ? + rawblock.quickGetValue(colIndex, rowIndex) : + rawblock.quickGetValue(rowIndex, colIndex); + if( val!=0 || !skipZeros ) { + IntArrayList lstPtr = distinctVals.get(val); + if( lstPtr == null ) { + lstPtr = new IntArrayList(); + distinctVals.appendValue(val, lstPtr); + } + lstPtr.appendValue(i); + } + } + + return new UncompressedBitmap(distinctVals); + } + + /** + * + * @param colIndices + * @param rawblock + * @param rowReader + * @return + */ + private static UncompressedBitmap extractBitmap(int[] colIndices, + MatrixBlock rawblock, ReaderColumnSelection rowReader) + { + //probe map for distinct items (for value or value groups) + DblArrayIntListHashMap distinctVals = new DblArrayIntListHashMap(); + + //scan rows and probe/build distinct items + DblArray cellVals = null; + while ((cellVals = rowReader.nextRow()) != null) { + IntArrayList lstPtr = distinctVals.get(cellVals); + if (lstPtr == null) { + //create new objects only on demand + lstPtr = new IntArrayList(); + distinctVals.appendValue(new DblArray(cellVals), lstPtr); + } + lstPtr.appendValue(rowReader.getCurrentRowIndex()); + } + + return new UncompressedBitmap(distinctVals, colIndices.length); + } +} http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/16e7b1c8/src/main/java/org/apache/sysml/runtime/compress/ColGroup.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/runtime/compress/ColGroup.java b/src/main/java/org/apache/sysml/runtime/compress/ColGroup.java new file mode 100644 index 0000000..647a701 --- /dev/null +++ b/src/main/java/org/apache/sysml/runtime/compress/ColGroup.java @@ -0,0 +1,259 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.sysml.runtime.compress; + +import java.io.DataInput; +import java.io.DataOutput; +import java.io.IOException; +import java.io.Serializable; +import java.util.List; + +import org.apache.sysml.runtime.DMLRuntimeException; +import org.apache.sysml.runtime.matrix.data.MatrixBlock; +import org.apache.sysml.runtime.matrix.operators.AggregateUnaryOperator; +import org.apache.sysml.runtime.matrix.operators.ScalarOperator; + +/** + * Class that stores information about a column group within a compressed matrix + * block. There are subclasses specific to each compression type. + * + */ +public abstract class ColGroup implements Serializable +{ + private static final long serialVersionUID = 2439785418908671481L; + + public enum CompressionType { + UNCOMPRESSED, //uncompressed sparse/dense + RLE_BITMAP, //RLE bitmap + OLE_BITMAP; //OLE bitmap + } + + /** + * Offsets of the columns that make up the column group. Zero-based, and + * relative to the matrix block. + */ + protected int[] _colIndexes; + + /** Number of rows in the matrix, for use by child classes. */ + protected int _numRows; + + /** How the elements of the column group are compressed. */ + private CompressionType _compType; + + + /** + * Main constructor. + * + * @param colIndices + * offsets of the columns in the matrix block that make up the + * group + * @param numRows + * total number of rows in the parent block + */ + protected ColGroup(CompressionType type, int[] colIndices, int numRows) { + _compType = type; + _colIndexes = colIndices; + _numRows = numRows; + } + + /** Convenience constructor for converting indices to a more compact format. */ + protected ColGroup(CompressionType type, List<Integer> colIndicesList, int numRows) { + _compType = type; + _colIndexes = new int[colIndicesList.size()]; + int i = 0; + for (Integer index : colIndicesList) + _colIndexes[i++] = index; + } + + /** + * @return offsets of the columns in the matrix block that make up the group + */ + public int[] getColIndices() { + return _colIndexes; + } + + /** + * @param col + * an index from 0 to the number of columns in this group - 1 + * @return offset of the specified column in the matrix block that make up + * the group + */ + public int getColIndex(int colNum) { + return _colIndexes[colNum]; + } + + public int getNumRows() { + return _numRows; + } + + /** + * @return number of columns in this column group + */ + public int getNumCols() { + return _colIndexes.length; + } + + /** + * @return How the elements of the column group are compressed. + */ + public CompressionType getCompType() { + return _compType; + } + + /** + * + * @param offset + */ + public void shiftColIndices(int offset) { + for( int i=0; i<_colIndexes.length; i++ ) + _colIndexes[i] += offset; + } + + /** + * Note: Must be overridden by child classes to account for additional data + * and metadata + * + * @return an upper bound on the number of bytes used to store this ColGroup + * in memory. + */ + public long estimateInMemorySize() { + // int numRows (4B) , array reference colIndices (8B) + array object + // overhead if exists (32B) + 4B per element, CompressionType compType + // (2 booleans 2B + enum overhead 32B + reference to enum 8B) + long size = 54; + if (_colIndexes == null) + return size; + else + return size + 32 + 4 * _colIndexes.length; + } + + /** + * Decompress the contents of this column group into the specified full + * matrix block. + * + * @param target + * a matrix block where the columns covered by this column group + * have not yet been filled in. + */ + public abstract void decompressToBlock(MatrixBlock target); + + /** + * Decompress the contents of this column group into uncompressed packed + * columns + * + * @param target + * a dense matrix block. The block must have enough space to hold + * the contents of this column group. + * @param colIndexTargets + * array that maps column indices in the original matrix block to + * columns of target. + */ + public abstract void decompressToBlock(MatrixBlock target, int[] colIndexTargets); + + /** + * + * @param target dense output vector + * @param colpos column to decompress, error if larger or equal numCols + */ + public abstract void decompressToBlock(MatrixBlock target, int colpos); + + + /** + * Serializes column group to data output. + * + * @param out + * @throws IOException + */ + public abstract void write(DataOutput out) + throws IOException; + + /** + * Deserializes column group from data input. + * + * @param in + * @throws IOException + */ + public abstract void readFields(DataInput in) + throws IOException; + + + /** + * Returns the exact serialized size of column group. + * This can be used for example for buffer preallocation. + * + * @return + */ + public abstract long getExactSizeOnDisk(); + + /** + * Multiply the slice of the matrix that this column group represents by a + * vector on the right. + * + * @param vector + * vector to multiply by (tall vector) + * @param result + * accumulator for holding the result + * @throws DMLRuntimeException + * if the internal SystemML code that performs the + * multiplication experiences an error + */ + public abstract void rightMultByVector(MatrixBlock vector, + MatrixBlock result, int rl, int ru) throws DMLRuntimeException; + + + /** + * Multiply the slice of the matrix that this column group represents by a + * row vector on the left (the original column vector is assumed to be + * transposed already i.e. its size now is 1xn). + * + * @param vector + * @param result + * @throws DMLRuntimeException + */ + public abstract void leftMultByRowVector(MatrixBlock vector, + MatrixBlock result) throws DMLRuntimeException; + + /** + * Perform the specified scalar operation directly on the compressed column + * group, without decompressing individual cells if possible. + * + * @param op + * operation to perform + * @return version of this column group with the operation applied + */ + public abstract ColGroup scalarOperation(ScalarOperator op) + throws DMLRuntimeException; + + /** + * + * @param op + * @param result + * @throws DMLUnsupportedOperationException + * @throws DMLRuntimeException + */ + public abstract void unaryAggregateOperations(AggregateUnaryOperator op, MatrixBlock result) + throws DMLRuntimeException; + + /** + * + * @param rnnz + */ + protected abstract void countNonZerosPerRow(int[] rnnz); +} http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/16e7b1c8/src/main/java/org/apache/sysml/runtime/compress/ColGroupBitmap.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/runtime/compress/ColGroupBitmap.java b/src/main/java/org/apache/sysml/runtime/compress/ColGroupBitmap.java new file mode 100644 index 0000000..5d60a49 --- /dev/null +++ b/src/main/java/org/apache/sysml/runtime/compress/ColGroupBitmap.java @@ -0,0 +1,498 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.sysml.runtime.compress; + +import java.io.DataInput; +import java.io.DataOutput; +import java.io.IOException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Iterator; +import java.util.TreeMap; +import java.util.Map.Entry; + +import org.apache.sysml.runtime.DMLRuntimeException; +import org.apache.sysml.runtime.compress.utils.LinearAlgebraUtils; +import org.apache.sysml.runtime.matrix.data.MatrixBlock; +import org.apache.sysml.runtime.matrix.operators.ScalarOperator; + + +/** + * Base class for column groups encoded with various types of bitmap encoding. + * + * + * NOTES: + * * OLE: separate storage segment length and bitmaps led to a 30% improvement + * but not applied because more difficult to support both data layouts at the + * same time (distributed/local as well as w/ and w/o low-level opt) + */ +public abstract class ColGroupBitmap extends ColGroup +{ + private static final long serialVersionUID = -1635828933479403125L; + + public static final boolean LOW_LEVEL_OPT = true; + //sorting of values by physical length helps by 10-20%, especially for serial, while + //slight performance decrease for parallel incl multi-threaded, hence not applied for + //distributed operations (also because compression time + garbage collection increases) + private static final boolean SORT_VALUES_BY_LENGTH = true; + protected static final boolean CREATE_SKIPLIST = true; + + protected static final int READ_CACHE_BLKSZ = 2 * BitmapEncoder.BITMAP_BLOCK_SZ; + protected static final int WRITE_CACHE_BLKSZ = 2 * BitmapEncoder.BITMAP_BLOCK_SZ; + + /** Distinct values associated with individual bitmaps. */ + protected double[] _values; //linearized <numcol vals> <numcol vals> + + /** Bitmaps, one per uncompressed value in {@link #values}. */ + protected int[] _ptr; //bitmap offsets per value + protected char[] _data; //linearized bitmaps (variable length) + + protected int[] _skiplist; + + public ColGroupBitmap(CompressionType type) { + super(type, (int[]) null, -1); + } + + /** + * Main constructor. Stores the headers for the individual bitmaps. + * + * @param colIndices + * indices (within the block) of the columns included in this + * column + * @param numRows + * total number of rows in the parent block + * @param ubm + * Uncompressed bitmap representation of the block + */ + public ColGroupBitmap(CompressionType type, int[] colIndices, int numRows, UncompressedBitmap ubm) + { + super(type, colIndices, numRows); + + // Extract and store just the distinct values. The bitmaps themselves go + // into the subclasses. + final int numCols = ubm.getNumColumns(); + final int numVals = ubm.getNumValues(); + + _values = new double[numVals*numCols]; + for (int i=0; i<numVals; i++) { + //note: deep copied internally on getValues + double[] tmp = ubm.getValues(i); + System.arraycopy(tmp, 0, _values, i*numCols, numCols); + } + } + + /** + * Constructor for subclass methods that need to create shallow copies + * + * @param colIndices + * raw column index information + * @param numRows + * number of rows in the block + * @param values + * set of distinct values for the block (associated bitmaps are + * kept in the subclass) + */ + protected ColGroupBitmap(CompressionType type, int[] colIndices, int numRows, double[] values) { + super(type, colIndices, numRows); + _values = values; + } + + protected final int len(int k) { + return _ptr[k+1] - _ptr[k]; + } + + /** + * + * @param numVals + * @param totalLen + * @param lbitmaps + */ + protected void createCompressedBitmaps(int numVals, int totalLen, char[][] lbitmaps) + { + // compact bitmaps to linearized representation + if( LOW_LEVEL_OPT && SORT_VALUES_BY_LENGTH + && _numRows > BitmapEncoder.BITMAP_BLOCK_SZ ) + { + // sort value by num segments in descending order + TreeMap<Integer,ArrayList<Integer>> tree = new TreeMap<Integer, ArrayList<Integer>>(); + for( int i=0; i<numVals; i++ ) { + int revlen = totalLen-lbitmaps[i].length; + if( !tree.containsKey(revlen) ) + tree.put(revlen, new ArrayList<Integer>()); + tree.get(revlen).add(i); + } + + // compact bitmaps to linearized representation + _ptr = new int[numVals+1]; + _data = new char[totalLen]; + int pos = 0, off = 0; + for( Entry<Integer,ArrayList<Integer>> e : tree.entrySet() ) { + for( Integer tmpix : e.getValue() ) { + int len = lbitmaps[tmpix].length; + _ptr[pos] = off; + System.arraycopy(lbitmaps[tmpix], 0, _data, off, len); + off += len; + pos++; + } + } + _ptr[numVals] = totalLen; + + // reorder values + double[] lvalues = new double[_values.length]; + int off2 = 0; int numCols = _colIndexes.length; + for( Entry<Integer,ArrayList<Integer>> e : tree.entrySet() ) { + for( Integer tmpix : e.getValue() ) { + System.arraycopy(_values, tmpix*numCols, lvalues, off2, numCols); + off2 += numCols; + } + } + _values = lvalues; + } + else + { + // compact bitmaps to linearized representation + _ptr = new int[numVals+1]; + _data = new char[totalLen]; + for( int i=0, off=0; i<numVals; i++ ) { + int len = lbitmaps[i].length; + _ptr[i] = off; + System.arraycopy(lbitmaps[i], 0, _data, off, len); + off += len; + } + _ptr[numVals] = totalLen; + } + } + + @Override + public long estimateInMemorySize() { + long size = super.estimateInMemorySize(); + + // adding the size of values + size += 8; //array reference + if (_values != null) { + size += 32 + _values.length * 8; //values + } + + // adding bitmaps size + size += 16; //array references + if (_data != null) { + size += 32 + _ptr.length * 4; // offsets + size += 32 + _data.length * 2; // bitmaps + } + + return size; + } + + //generic decompression for OLE/RLE, to be overwritten for performance + @Override + public void decompressToBlock(MatrixBlock target) + { + final int numCols = getNumCols(); + final int numVals = getNumValues(); + int[] colIndices = getColIndices(); + + // Run through the bitmaps for this column group + for (int i = 0; i < numVals; i++) { + Iterator<Integer> decoder = getDecodeIterator(i); + int valOff = i*numCols; + + while (decoder.hasNext()) { + int row = decoder.next(); + for (int colIx = 0; colIx < numCols; colIx++) { + target.appendValue(row, colIndices[colIx], _values[valOff+colIx]); + } + } + } + } + + //generic decompression for OLE/RLE, to be overwritten for performance + @Override + public void decompressToBlock(MatrixBlock target, int[] colIndexTargets) + { + final int numCols = getNumCols(); + final int numVals = getNumValues(); + + // Run through the bitmaps for this column group + for (int i = 0; i < numVals; i++) { + Iterator<Integer> decoder = getDecodeIterator(i); + int valOff = i*numCols; + + while (decoder.hasNext()) { + int row = decoder.next(); + for (int colIx = 0; colIx < numCols; colIx++) { + int origMatrixColIx = getColIndex(colIx); + int targetColIx = colIndexTargets[origMatrixColIx]; + target.quickSetValue(row, targetColIx, _values[valOff+colIx]); + } + } + } + } + + //generic decompression for OLE/RLE, to be overwritten for performance + @Override + public void decompressToBlock(MatrixBlock target, int colpos) + { + final int numCols = getNumCols(); + final int numVals = getNumValues(); + + // Run through the bitmaps for this column group + for (int i = 0; i < numVals; i++) { + Iterator<Integer> decoder = getDecodeIterator(i); + int valOff = i*numCols; + + while (decoder.hasNext()) { + int row = decoder.next(); + target.quickSetValue(row, 0, _values[valOff+colpos]); + } + } + } + + /** + * + * @param bitmapIx + * @return + */ + protected double sumValues(int bitmapIx) + { + final int numCols = getNumCols(); + final int valOff = bitmapIx * numCols; + + double val = 0.0; + for( int i = 0; i < numCols; i++ ) { + val += _values[valOff+i]; + } + + return val; + } + + protected double sumValues(int bitmapIx, double[] b) + { + final int numCols = getNumCols(); + final int valOff = bitmapIx * numCols; + + double val = 0; + for( int i = 0; i < numCols; i++ ) { + val += _values[valOff+i] * b[i]; + } + + return val; + } + + /** + * + * @param b + * @param c + */ + protected void sumAllValues(double[] b, double[] c) + { + final int numVals = getNumValues(); + final int numCols = getNumCols(); + + //vectMultiplyAdd over cols instead of dotProduct over vals because + //usually more values than columns + for( int i=0, off=0; i<numCols; i++, off+=numVals ) + LinearAlgebraUtils.vectMultiplyAdd(b[i], _values, c, off, 0, numVals); + } + + /** + * Method for use by subclasses. Applies a scalar operation to the value + * metadata stored in the superclass. + * + * @param op + * scalar operation to perform + * @return transformed copy of value metadata for this column group + * @throws DMLRuntimeException + */ + protected double[] applyScalarOp(ScalarOperator op) + throws DMLRuntimeException + { + //scan over linearized values + double[] ret = new double[_values.length]; + for (int i = 0; i < _values.length; i++) { + ret[i] = op.executeScalar(_values[i]); + } + + return ret; + } + + /** + * + * @param op + * @param newVal + * @param numCols + * @return + * @throws DMLRuntimeException + */ + protected double[] applyScalarOp(ScalarOperator op, double newVal, int numCols) + throws DMLRuntimeException + { + //scan over linearized values + double[] ret = new double[_values.length + numCols]; + for( int i = 0; i < _values.length; i++ ) { + ret[i] = op.executeScalar(_values[i]); + } + + //add new value to the end + Arrays.fill(ret, _values.length, _values.length+numCols, newVal); + + return ret; + } + + /** + * @return the number of distinct sets of values associated with the bitmaps + * in this column group + */ + public int getNumValues() { + return _values.length / _colIndexes.length; + } + + /** + * + * @return + */ + public double[] getValues() { + return _values; + } + + /** + * + * @return + */ + public char[] getBitmaps() { + return _data; + } + + public int[] getBitmapOffsets() { + return _ptr; + } + + /** + * @param bmpIx + * index of a specific compressed bitmap (stored in subclass, + * index same as {@link #values}) + * @return an object for iterating over the row offsets in this bitmap. Only + * valid until the next call to this method. May be reused across + * calls. + */ + public abstract Iterator<Integer> getDecodeIterator(int bmpIx); + + /** + * Utility function of sparse-unsafe operations. + * + * @param ind + * @return + * @throws DMLRuntimeException + */ + protected int[] computeOffsets(boolean[] ind) + throws DMLRuntimeException + { + //determine number of offsets + int numOffsets = 0; + for( int i=0; i<ind.length; i++ ) + numOffsets += ind[i] ? 1 : 0; + + //create offset lists + int[] ret = new int[numOffsets]; + for( int i=0, pos=0; i<ind.length; i++ ) + if( ind[i] ) + ret[pos++] = i; + + return ret; + } + + @Override + public void readFields(DataInput in) + throws IOException + { + _numRows = in.readInt(); + int numCols = in.readInt(); + int numVals = in.readInt(); + + //read col indices + _colIndexes = new int[ numCols ]; + for( int i=0; i<numCols; i++ ) + _colIndexes[i] = in.readInt(); + + //read distinct values + _values = new double[numVals*numCols]; + for( int i=0; i<numVals*numCols; i++ ) + _values[i] = in.readDouble(); + + //read bitmaps + int totalLen = in.readInt(); + _ptr = new int[numVals+1]; + _data = new char[totalLen]; + for( int i=0, off=0; i<numVals; i++ ) { + int len = in.readInt(); + _ptr[i] = off; + for( int j=0; j<len; j++ ) + _data[off+j] = in.readChar(); + off += len; + } + _ptr[numVals] = totalLen; + } + + @Override + public void write(DataOutput out) + throws IOException + { + int numCols = getNumCols(); + int numVals = getNumValues(); + out.writeInt(_numRows); + out.writeInt(numCols); + out.writeInt(numVals); + + //write col indices + for( int i=0; i<_colIndexes.length; i++ ) + out.writeInt( _colIndexes[i] ); + + //write distinct values + for( int i=0; i<_values.length; i++ ) + out.writeDouble(_values[i]); + + //write bitmaps (lens and data, offset later recreated) + int totalLen = 0; + for( int i=0; i<numVals; i++ ) + totalLen += len(i); + out.writeInt(totalLen); + for( int i=0; i<numVals; i++ ) { + int len = len(i); + int off = _ptr[i]; + out.writeInt(len); + for( int j=0; j<len; j++ ) + out.writeChar(_data[off+j]); + } + } + + @Override + public long getExactSizeOnDisk() { + long ret = 12; //header + //col indices + ret += 4 * _colIndexes.length; + //distinct values (groups of values) + ret += 8 * _values.length; + //actual bitmaps + ret += 4; //total length + for( int i=0; i<getNumValues(); i++ ) + ret += 4 + 2 * len(i); + + return ret; + } +} http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/16e7b1c8/src/main/java/org/apache/sysml/runtime/compress/ColGroupOLE.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/runtime/compress/ColGroupOLE.java b/src/main/java/org/apache/sysml/runtime/compress/ColGroupOLE.java new file mode 100644 index 0000000..a2b493a --- /dev/null +++ b/src/main/java/org/apache/sysml/runtime/compress/ColGroupOLE.java @@ -0,0 +1,634 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.sysml.runtime.compress; + +import java.util.Arrays; +import java.util.Iterator; + +import org.apache.sysml.runtime.DMLRuntimeException; +import org.apache.sysml.runtime.compress.utils.ConverterUtils; +import org.apache.sysml.runtime.compress.utils.LinearAlgebraUtils; +import org.apache.sysml.runtime.functionobjects.KahanFunction; +import org.apache.sysml.runtime.functionobjects.KahanPlus; +import org.apache.sysml.runtime.functionobjects.KahanPlusSq; +import org.apache.sysml.runtime.functionobjects.ReduceAll; +import org.apache.sysml.runtime.functionobjects.ReduceCol; +import org.apache.sysml.runtime.functionobjects.ReduceRow; +import org.apache.sysml.runtime.instructions.cp.KahanObject; +import org.apache.sysml.runtime.matrix.data.MatrixBlock; +import org.apache.sysml.runtime.matrix.operators.AggregateUnaryOperator; +import org.apache.sysml.runtime.matrix.operators.ScalarOperator; + +/** + * Class to encapsulate information about a column group that is encoded with + * simple lists of offsets for each set of distinct values. + * + */ +public class ColGroupOLE extends ColGroupBitmap +{ + private static final long serialVersionUID = -9157676271360528008L; + + public ColGroupOLE() { + super(CompressionType.OLE_BITMAP); + } + + /** + * Main constructor. Constructs and stores the necessary bitmaps. + * + * @param colIndices + * indices (within the block) of the columns included in this + * column + * @param numRows + * total number of rows in the parent block + * @param ubm + * Uncompressed bitmap representation of the block + */ + public ColGroupOLE(int[] colIndices, int numRows, UncompressedBitmap ubm) + { + super(CompressionType.OLE_BITMAP, colIndices, numRows, ubm); + + // compress the bitmaps + final int numVals = ubm.getNumValues(); + char[][] lbitmaps = new char[numVals][]; + int totalLen = 0; + for( int i=0; i<numVals; i++ ) { + lbitmaps[i] = BitmapEncoder.genOffsetBitmap(ubm.getOffsetsList(i)); + totalLen += lbitmaps[i].length; + } + + // compact bitmaps to linearized representation + createCompressedBitmaps(numVals, totalLen, lbitmaps); + + + if( LOW_LEVEL_OPT && CREATE_SKIPLIST + && numRows > 2*BitmapEncoder.BITMAP_BLOCK_SZ ) + { + int blksz = BitmapEncoder.BITMAP_BLOCK_SZ; + _skiplist = new int[numVals]; + int rl = (getNumRows()/2/blksz)*blksz; + for (int k = 0; k < numVals; k++) { + int bitmapOff = _ptr[k]; + int bitmapLen = len(k); + int bufIx = 0; + for( int i=0; i<rl && bufIx<bitmapLen; i+=blksz ) { + bufIx += _data[bitmapOff+bufIx] + 1; + } + _skiplist[k] = bufIx; + } + } + } + + /** + * Constructor for internal use. + */ + public ColGroupOLE(int[] colIndices, int numRows, double[] values, char[] bitmaps, int[] bitmapOffs) { + super(CompressionType.OLE_BITMAP, colIndices, numRows, values); + _data = bitmaps; + _ptr = bitmapOffs; + } + + @Override + public Iterator<Integer> getDecodeIterator(int bmpIx) { + return new BitmapDecoderOLE(_data, _ptr[bmpIx], len(bmpIx)); + } + + @Override + public void decompressToBlock(MatrixBlock target) + { + if( LOW_LEVEL_OPT && getNumValues() > 1 ) + { + final int blksz = BitmapEncoder.BITMAP_BLOCK_SZ; + final int numCols = getNumCols(); + final int numVals = getNumValues(); + final int n = getNumRows(); + + //cache blocking config and position array + int[] apos = new int[numVals]; + + //cache conscious append via horizontal scans + for( int bi=0; bi<n; bi+=blksz ) { + for (int k = 0, off=0; k < numVals; k++, off+=numCols) { + int bitmapOff = _ptr[k]; + int bitmapLen = len(k); + int bufIx = apos[k]; + if( bufIx >= bitmapLen ) + continue; + int len = _data[bitmapOff+bufIx]; + int pos = bitmapOff+bufIx+1; + for( int i=pos; i<pos+len; i++ ) + for( int j=0, rix = bi+_data[i]; j<numCols; j++ ) + if( _values[off+j]!=0 ) + target.appendValue(rix, _colIndexes[j], _values[off+j]); + apos[k] += len + 1; + } + } + } + else + { + //call generic decompression with decoder + super.decompressToBlock(target); + } + } + + @Override + public void decompressToBlock(MatrixBlock target, int[] colixTargets) + { + if( LOW_LEVEL_OPT && getNumValues() > 1 ) + { + final int blksz = BitmapEncoder.BITMAP_BLOCK_SZ; + final int numCols = getNumCols(); + final int numVals = getNumValues(); + final int n = getNumRows(); + + //cache blocking config and position array + int[] apos = new int[numVals]; + int[] cix = new int[numCols]; + + //prepare target col indexes + for( int j=0; j<numCols; j++ ) + cix[j] = colixTargets[_colIndexes[j]]; + + //cache conscious append via horizontal scans + for( int bi=0; bi<n; bi+=blksz ) { + for (int k = 0, off=0; k < numVals; k++, off+=numCols) { + int bitmapOff = _ptr[k]; + int bitmapLen = len(k); + int bufIx = apos[k]; + if( bufIx >= bitmapLen ) + continue; + int len = _data[bitmapOff+bufIx]; + int pos = bitmapOff+bufIx+1; + for( int i=pos; i<pos+len; i++ ) + for( int j=0, rix = bi+_data[i]; j<numCols; j++ ) + if( _values[off+j]!=0 ) + target.appendValue(rix, cix[j], _values[off+j]); + apos[k] += len + 1; + } + } + } + else + { + //call generic decompression with decoder + super.decompressToBlock(target, colixTargets); + } + } + + @Override + public void decompressToBlock(MatrixBlock target, int colpos) + { + if( LOW_LEVEL_OPT && getNumValues() > 1 ) + { + final int blksz = BitmapEncoder.BITMAP_BLOCK_SZ; + final int numCols = getNumCols(); + final int numVals = getNumValues(); + final int n = getNumRows(); + double[] c = target.getDenseBlock(); + + //cache blocking config and position array + int[] apos = new int[numVals]; + + //cache conscious append via horizontal scans + for( int bi=0; bi<n; bi+=blksz ) { + for (int k = 0, off=0; k < numVals; k++, off+=numCols) { + int bitmapOff = _ptr[k]; + int bitmapLen = len(k); + int bufIx = apos[k]; + if( bufIx >= bitmapLen ) + continue; + int len = _data[bitmapOff+bufIx]; + int pos = bitmapOff+bufIx+1; + for( int i=pos; i<pos+len; i++ ) { + c[bi+_data[i]] = _values[off+colpos]; + } + apos[k] += len + 1; + } + } + + target.recomputeNonZeros(); + } + else + { + //call generic decompression with decoder + super.decompressToBlock(target, colpos); + } + } + + @Override + public ColGroup scalarOperation(ScalarOperator op) + throws DMLRuntimeException + { + double val0 = op.executeScalar(0); + + //fast path: sparse-safe operations + // Note that bitmaps don't change and are shallow-copied + if( op.sparseSafe || val0==0 ) { + return new ColGroupOLE(_colIndexes, _numRows, + applyScalarOp(op), _data, _ptr); + } + + //slow path: sparse-unsafe operations (potentially create new bitmap) + //note: for efficiency, we currently don't drop values that become 0 + boolean[] lind = computeZeroIndicatorVector(); + int[] loff = computeOffsets(lind); + if( loff.length==0 ) { //empty offset list: go back to fast path + return new ColGroupOLE(_colIndexes, _numRows, + applyScalarOp(op), _data, _ptr); + } + + double[] rvalues = applyScalarOp(op, val0, getNumCols()); + char[] lbitmap = BitmapEncoder.genOffsetBitmap(loff); + char[] rbitmaps = Arrays.copyOf(_data, _data.length+lbitmap.length); + System.arraycopy(lbitmap, 0, rbitmaps, _data.length, lbitmap.length); + int[] rbitmapOffs = Arrays.copyOf(_ptr, _ptr.length+1); + rbitmapOffs[rbitmapOffs.length-1] = rbitmaps.length; + + return new ColGroupOLE(_colIndexes, _numRows, + rvalues, rbitmaps, rbitmapOffs); + } + + @Override + public void rightMultByVector(MatrixBlock vector, MatrixBlock result, int rl, int ru) + throws DMLRuntimeException + { + double[] b = ConverterUtils.getDenseVector(vector); + double[] c = result.getDenseBlock(); + final int blksz = BitmapEncoder.BITMAP_BLOCK_SZ; + final int numCols = getNumCols(); + final int numVals = getNumValues(); + + //prepare reduced rhs w/ relevant values + double[] sb = new double[numCols]; + for (int j = 0; j < numCols; j++) { + sb[j] = b[_colIndexes[j]]; + } + + if( LOW_LEVEL_OPT && numVals > 1 && _numRows > blksz ) + { + //since single segment scans already exceed typical L2 cache sizes + //and because there is some overhead associated with blocking, the + //best configuration aligns with L3 cache size (x*vcores*64K*8B < L3) + //x=4 leads to a good yet slightly conservative compromise for single-/ + //multi-threaded and typical number of cores and L3 cache sizes + final int blksz2 = ColGroupBitmap.WRITE_CACHE_BLKSZ; + + //step 1: prepare position and value arrays + + //current pos / values per OLs + int[] apos = new int[numVals]; + double[] aval = new double[numVals]; + + //skip-scan to beginning for all OLs + if( rl > 0 ) { //rl aligned with blksz + int rskip = (getNumRows()/2/blksz)*blksz; + + for (int k = 0; k < numVals; k++) { + int bitmapOff = _ptr[k]; + int bitmapLen = len(k); + int start = (rl>=rskip)?rskip:0; + int bufIx = (rl>=rskip)?_skiplist[k]:0; + for( int i=start; i<rl && bufIx<bitmapLen; i+=blksz ) { + bufIx += _data[bitmapOff+bufIx] + 1; + } + apos[k] = bufIx; + } + } + + //pre-aggregate values per OLs + for( int k = 0; k < numVals; k++ ) + aval[k] = sumValues(k, sb); + + //step 2: cache conscious matrix-vector via horizontal scans + for( int bi=rl; bi<ru; bi+=blksz2 ) + { + int bimax = Math.min(bi+blksz2, ru); + + //horizontal segment scan, incl pos maintenance + for (int k = 0; k < numVals; k++) { + int bitmapOff = _ptr[k]; + int bitmapLen = len(k); + double val = aval[k]; + int bufIx = apos[k]; + + for( int ii=bi; ii<bimax && bufIx<bitmapLen; ii+=blksz ) { + //prepare length, start, and end pos + int len = _data[bitmapOff+bufIx]; + int pos = bitmapOff+bufIx+1; + + //compute partial results + //LinearAlgebraUtils.vectAdd(val, c, bitmaps, pos, ii, len); + for( int i=pos; i<pos+len; i++ ) + c[ii + _data[i]] += val; + bufIx += len + 1; + } + + apos[k] = bufIx; + } + } + } + else + { + //iterate over all values and their bitmaps + for (int k = 0; k < numVals; k++) + { + //prepare value-to-add for entire value bitmap + int bitmapOff = _ptr[k]; + int bitmapLen = len(k); + double val = sumValues(k, sb); + + //iterate over bitmap blocks and add values + if (val != 0) { + int offsetBase = 0; + int bufIx = 0; + int curBlckLen; + + //scan to beginning offset if necessary + if( rl > 0 ){ + for (; bufIx<bitmapLen & offsetBase<rl; bufIx += curBlckLen + 1, offsetBase += blksz) { + curBlckLen = _data[bitmapOff+bufIx]; + } + } + + //compute partial results + for (; bufIx<bitmapLen & offsetBase<ru; bufIx += curBlckLen + 1, offsetBase += blksz) { + curBlckLen = _data[bitmapOff+bufIx]; + for (int blckIx = 1; blckIx <= curBlckLen; blckIx++) { + c[offsetBase + _data[bitmapOff+bufIx + blckIx]] += val; + } + } + } + } + } + } + + @Override + public void leftMultByRowVector(MatrixBlock vector, MatrixBlock result) + throws DMLRuntimeException + { + double[] a = ConverterUtils.getDenseVector(vector); + double[] c = result.getDenseBlock(); + final int blksz = BitmapEncoder.BITMAP_BLOCK_SZ; + final int numCols = getNumCols(); + final int numVals = getNumValues(); + final int n = getNumRows(); + + if( LOW_LEVEL_OPT && numVals > 1 && _numRows > blksz ) + { + //cache blocking config (see matrix-vector mult for explanation) + final int blksz2 = ColGroupBitmap.READ_CACHE_BLKSZ; + + //step 1: prepare position and value arrays + + //current pos per OLs / output values + int[] apos = new int[numVals]; + double[] cvals = new double[numVals]; + + //step 2: cache conscious matrix-vector via horizontal scans + for( int ai=0; ai<n; ai+=blksz2 ) + { + int aimax = Math.min(ai+blksz2, n); + + //horizontal segment scan, incl pos maintenance + for (int k = 0; k < numVals; k++) { + int bitmapOff = _ptr[k]; + int bitmapLen = len(k); + int bufIx = apos[k]; + double vsum = 0; + + for( int ii=ai; ii<aimax && bufIx<bitmapLen; ii+=blksz ) { + //prepare length, start, and end pos + int len = _data[bitmapOff+bufIx]; + int pos = bitmapOff+bufIx+1; + + //iterate over bitmap blocks and compute partial results (a[i]*1) + vsum += LinearAlgebraUtils.vectSum(a, _data, ii, pos, len); + bufIx += len + 1; + } + + apos[k] = bufIx; + cvals[k] += vsum; + } + } + + //step 3: scale partial results by values and write to global output + for (int k = 0, valOff=0; k < numVals; k++, valOff+=numCols) + for( int j = 0; j < numCols; j++ ) + c[ _colIndexes[j] ] += cvals[k] * _values[valOff+j]; + } + else + { + //iterate over all values and their bitmaps + for (int k=0, valOff=0; k<numVals; k++, valOff+=numCols) + { + int bitmapOff = _ptr[k]; + int bitmapLen = len(k); + + //iterate over bitmap blocks and add partial results + double vsum = 0; + for (int bix=0, off=0; bix < bitmapLen; bix+=_data[bitmapOff+bix]+1, off+=blksz) + vsum += LinearAlgebraUtils.vectSum(a, _data, off, bitmapOff+bix+1, _data[bitmapOff+bix]); + + //scale partial results by values and write results + for( int j = 0; j < numCols; j++ ) + c[ _colIndexes[j] ] += vsum * _values[ valOff+j ]; + } + } + } + + @Override + public void unaryAggregateOperations(AggregateUnaryOperator op, MatrixBlock result) + throws DMLRuntimeException + { + KahanFunction kplus = (op.aggOp.increOp.fn instanceof KahanPlus) ? + KahanPlus.getKahanPlusFnObject() : KahanPlusSq.getKahanPlusSqFnObject(); + + if( op.indexFn instanceof ReduceAll ) + computeSum(result, kplus); + else if( op.indexFn instanceof ReduceCol ) + computeRowSums(result, kplus); + else if( op.indexFn instanceof ReduceRow ) + computeColSums(result, kplus); + } + + /** + * + * @param result + */ + private void computeSum(MatrixBlock result, KahanFunction kplus) + { + KahanObject kbuff = new KahanObject(result.quickGetValue(0, 0), result.quickGetValue(0, 1)); + + //iterate over all values and their bitmaps + final int numVals = getNumValues(); + final int numCols = getNumCols(); + + for (int bitmapIx = 0; bitmapIx < numVals; bitmapIx++) + { + int bitmapOff = _ptr[bitmapIx]; + int bitmapLen = len(bitmapIx); + int valOff = bitmapIx * numCols; + + //iterate over bitmap blocks and count partial lengths + int count = 0; + for (int bix=0; bix < bitmapLen; bix+=_data[bitmapOff+bix]+1) + count += _data[bitmapOff+bix]; + + //scale counts by all values + for( int j = 0; j < numCols; j++ ) + kplus.execute3(kbuff, _values[ valOff+j ], count); + } + + result.quickSetValue(0, 0, kbuff._sum); + result.quickSetValue(0, 1, kbuff._correction); + } + + /** + * + * @param result + */ + private void computeRowSums(MatrixBlock result, KahanFunction kplus) + { + KahanObject kbuff = new KahanObject(0, 0); + + final int blksz = BitmapEncoder.BITMAP_BLOCK_SZ; + final int numVals = getNumValues(); + + //iterate over all values and their bitmaps + for (int bitmapIx = 0; bitmapIx < numVals; bitmapIx++) + { + //prepare value-to-add for entire value bitmap + int bitmapOff = _ptr[bitmapIx]; + int bitmapLen = len(bitmapIx); + double val = sumValues(bitmapIx); + + //iterate over bitmap blocks and add values + if (val != 0) { + int offsetBase = 0; + int bufIx = 0; + int curBlckLen; + for (; bufIx < bitmapLen; bufIx += curBlckLen + 1, offsetBase += blksz) { + curBlckLen = _data[bitmapOff+bufIx]; + for (int blckIx = 1; blckIx <= curBlckLen; blckIx++) { + int rix = offsetBase + _data[bitmapOff+bufIx + blckIx]; + kbuff.set(result.quickGetValue(rix, 0), result.quickGetValue(rix, 1)); + kplus.execute2(kbuff, val); + result.quickSetValue(rix, 0, kbuff._sum); + result.quickSetValue(rix, 1, kbuff._correction); + } + } + } + } + } + + /** + * + * @param result + */ + private void computeColSums(MatrixBlock result, KahanFunction kplus) + { + KahanObject kbuff = new KahanObject(0, 0); + + //iterate over all values and their bitmaps + final int numVals = getNumValues(); + final int numCols = getNumCols(); + for (int bitmapIx = 0; bitmapIx < numVals; bitmapIx++) + { + int bitmapOff = _ptr[bitmapIx]; + int bitmapLen = len(bitmapIx); + int valOff = bitmapIx * numCols; + + //iterate over bitmap blocks and count partial lengths + int count = 0; + for (int bix=0; bix < bitmapLen; bix+=_data[bitmapOff+bix]+1) + count += _data[bitmapOff+bix]; + + //scale counts by all values + for( int j = 0; j < numCols; j++ ) { + kbuff.set(result.quickGetValue(0, _colIndexes[j]),result.quickGetValue(1, _colIndexes[j])); + kplus.execute3(kbuff, _values[ valOff+j ], count); + result.quickSetValue(0, _colIndexes[j], kbuff._sum); + result.quickSetValue(1, _colIndexes[j], kbuff._correction); + } + } + } + + /** + * Utility function of sparse-unsafe operations. + * + * @return + * @throws DMLRuntimeException + */ + private boolean[] computeZeroIndicatorVector() + throws DMLRuntimeException + { + boolean[] ret = new boolean[_numRows]; + final int blksz = BitmapEncoder.BITMAP_BLOCK_SZ; + final int numVals = getNumValues(); + + //initialize everything with zero + Arrays.fill(ret, true); + + //iterate over all values and their bitmaps + for (int bitmapIx = 0; bitmapIx < numVals; bitmapIx++) + { + //prepare value-to-add for entire value bitmap + int bitmapOff = _ptr[bitmapIx]; + int bitmapLen = len(bitmapIx); + + //iterate over bitmap blocks and add values + int offsetBase = 0; + int bufIx = 0; + int curBlckLen; + for (; bufIx < bitmapLen; bufIx += curBlckLen + 1, offsetBase += blksz) { + curBlckLen = _data[bitmapOff+bufIx]; + for (int blckIx = 1; blckIx <= curBlckLen; blckIx++) { + ret[offsetBase + _data[bitmapOff+bufIx + blckIx]] &= false; + } + } + } + + return ret; + } + + @Override + protected void countNonZerosPerRow(int[] rnnz) + { + final int blksz = BitmapEncoder.BITMAP_BLOCK_SZ; + final int numVals = getNumValues(); + final int numCols = getNumCols(); + + //iterate over all values and their bitmaps + for (int k = 0; k < numVals; k++) + { + //prepare value-to-add for entire value bitmap + int bitmapOff = _ptr[k]; + int bitmapLen = len(k); + + //iterate over bitmap blocks and add values + int offsetBase = 0; + int curBlckLen; + for (int bufIx=0; bufIx<bitmapLen; bufIx+=curBlckLen+1, offsetBase+=blksz) { + curBlckLen = _data[bitmapOff+bufIx]; + for (int blckIx = 1; blckIx <= curBlckLen; blckIx++) { + rnnz[offsetBase + _data[bitmapOff+bufIx + blckIx]] += numCols; + } + } + } + } +}
