[SYSTEMML-766][SYSTEMML-810] Fix missing licenses / unix line delimiters Project: http://git-wip-us.apache.org/repos/asf/incubator-systemml/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-systemml/commit/da318739 Tree: http://git-wip-us.apache.org/repos/asf/incubator-systemml/tree/da318739 Diff: http://git-wip-us.apache.org/repos/asf/incubator-systemml/diff/da318739
Branch: refs/heads/master Commit: da3187392cb67e8a5bd3a494fae68b8dfbc0d883 Parents: 2b7fdb2 Author: Matthias Boehm <[email protected]> Authored: Sun Jul 17 14:31:27 2016 -0700 Committer: Matthias Boehm <[email protected]> Committed: Sun Jul 17 14:31:27 2016 -0700 ---------------------------------------------------------------------- .../runtime/compress/BitmapDecoderOLE.java | 254 +- .../runtime/compress/BitmapDecoderRLE.java | 234 +- .../sysml/runtime/compress/BitmapEncoder.java | 784 ++--- .../apache/sysml/runtime/compress/ColGroup.java | 518 ++-- .../sysml/runtime/compress/ColGroupBitmap.java | 996 +++---- .../sysml/runtime/compress/ColGroupOLE.java | 1268 ++++----- .../sysml/runtime/compress/ColGroupRLE.java | 1234 ++++---- .../runtime/compress/ColGroupUncompressed.java | 720 ++--- .../runtime/compress/CompressedMatrixBlock.java | 2684 +++++++++--------- .../runtime/compress/PlanningBinPacker.java | 382 +-- .../runtime/compress/PlanningCoCodingGroup.java | 206 +- .../compress/PlanningGroupMergeAction.java | 146 +- .../runtime/compress/ReaderColumnSelection.java | 128 +- .../compress/ReaderColumnSelectionDense.java | 136 +- .../ReaderColumnSelectionDenseSample.java | 172 +- .../compress/ReaderColumnSelectionSparse.java | 230 +- .../runtime/compress/UncompressedBitmap.java | 202 +- .../compress/estim/CompressedSizeEstimator.java | 290 +- .../estim/CompressedSizeEstimatorExact.java | 106 +- .../estim/CompressedSizeEstimatorSample.java | 1534 +++++----- .../compress/estim/CompressedSizeInfo.java | 138 +- .../sysml/runtime/compress/utils/DblArray.java | 182 +- .../compress/utils/DblArrayIntListHashMap.java | 358 +-- .../compress/utils/DoubleIntListHashMap.java | 362 +-- .../compress/utils/LinearAlgebraUtils.java | 766 ++--- .../instructions/cp/PlusMultCPInstruction.java | 19 + src/test/scripts/functions/compress/LinregCG.R | 72 +- .../scripts/functions/compress/LinregCG.dml | 70 +- 28 files changed, 7105 insertions(+), 7086 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/da318739/src/main/java/org/apache/sysml/runtime/compress/BitmapDecoderOLE.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/runtime/compress/BitmapDecoderOLE.java b/src/main/java/org/apache/sysml/runtime/compress/BitmapDecoderOLE.java index fc6e861..539d1f5 100644 --- a/src/main/java/org/apache/sysml/runtime/compress/BitmapDecoderOLE.java +++ b/src/main/java/org/apache/sysml/runtime/compress/BitmapDecoderOLE.java @@ -1,127 +1,127 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.sysml.runtime.compress; - -import java.util.Arrays; -import java.util.Iterator; - -/** - * General-purpose iterator to decode a compressed OLE bitmap. - * - */ -public final class BitmapDecoderOLE implements Iterator<Integer> -{ - // pointer to the compressed bitmap - private int _bmOff; - private int _bmLen; - private char[] _bmPtr; - - // The index of the current block. Block 0 covers bits 1 through 2^16 - private int _blockIx; - - // The offset where the current block starts within the bitmap. - private int _blockStartOffset; - - // The number of offsets in the current block. - private int _curBlockSize; - - // The offset <b>in the current block</b> the <b>next</b> element we will - // read from the bitmap, or bmPtr.length if we are done. - private int _nextBmOffset; - - /** - * Point this object at the beginning of a particular bitmap. After a call - * to this method, the next call to {@link #nextOffset()} will return the - * offset of the first bit in the specified bitmap. - * - * @param bmPtr - * pointer to a compressed bitmap - */ - public BitmapDecoderOLE(char[] bmPtr, int off, int len) { - _bmOff = off; - _bmLen = len; - _bmPtr = bmPtr; - _blockIx = 0; - _blockStartOffset = 0; - _curBlockSize = _bmPtr[_bmOff+_blockStartOffset]; - if (_curBlockSize < 0) { - throw new RuntimeException(String.format( - "Negative block size %d at position %d of %s", - _curBlockSize, _blockStartOffset, Arrays.toString(bmPtr))); - } - _nextBmOffset = 0; - - // Advance past any zero-length blocks at the beginning of the array - while (_blockStartOffset < _bmLen - && _nextBmOffset >= _curBlockSize) { - advanceToNextBlock(); - } - } - - @Override - public Integer next() { - if( !hasNext() ) - throw new RuntimeException("No next offset existing."); - - // Grab the lookahead value Note the +1 in the array indexing; - // the first number in a block is the block size - int offsetFromBlockBegin = _bmPtr[_bmOff+_blockStartOffset + 1 + _nextBmOffset]; - int ret = (_blockIx * BitmapEncoder.BITMAP_BLOCK_SZ) - + offsetFromBlockBegin; - _nextBmOffset++; - - // Advance to next non-empty block if we reached the end of the block. - while (_blockStartOffset < _bmLen && _nextBmOffset >= _curBlockSize) { - advanceToNextBlock(); - } - - return ret; - } - - @Override - public boolean hasNext() { - return _blockStartOffset < _bmLen; - } - - @Override - public void remove() { - throw new RuntimeException("Not implemented for BitmapDecoderOLE."); - } - - /** - * Move forward to the next block. Does not skip empty blocks. - */ - private void advanceToNextBlock() { - _blockStartOffset += (1 + _curBlockSize); - _blockIx++; - if (_blockStartOffset >= _bmLen) { - // Read past last block - return; - } - - _curBlockSize = _bmPtr[_bmOff+_blockStartOffset]; - if (_curBlockSize < 0) { - throw new RuntimeException(String.format( - "Negative block size %d at position %d of %s", - _curBlockSize, _blockStartOffset, Arrays.toString(_bmPtr))); - } - _nextBmOffset = 0; - } -} +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.sysml.runtime.compress; + +import java.util.Arrays; +import java.util.Iterator; + +/** + * General-purpose iterator to decode a compressed OLE bitmap. + * + */ +public final class BitmapDecoderOLE implements Iterator<Integer> +{ + // pointer to the compressed bitmap + private int _bmOff; + private int _bmLen; + private char[] _bmPtr; + + // The index of the current block. Block 0 covers bits 1 through 2^16 + private int _blockIx; + + // The offset where the current block starts within the bitmap. + private int _blockStartOffset; + + // The number of offsets in the current block. + private int _curBlockSize; + + // The offset <b>in the current block</b> the <b>next</b> element we will + // read from the bitmap, or bmPtr.length if we are done. + private int _nextBmOffset; + + /** + * Point this object at the beginning of a particular bitmap. After a call + * to this method, the next call to {@link #nextOffset()} will return the + * offset of the first bit in the specified bitmap. + * + * @param bmPtr + * pointer to a compressed bitmap + */ + public BitmapDecoderOLE(char[] bmPtr, int off, int len) { + _bmOff = off; + _bmLen = len; + _bmPtr = bmPtr; + _blockIx = 0; + _blockStartOffset = 0; + _curBlockSize = _bmPtr[_bmOff+_blockStartOffset]; + if (_curBlockSize < 0) { + throw new RuntimeException(String.format( + "Negative block size %d at position %d of %s", + _curBlockSize, _blockStartOffset, Arrays.toString(bmPtr))); + } + _nextBmOffset = 0; + + // Advance past any zero-length blocks at the beginning of the array + while (_blockStartOffset < _bmLen + && _nextBmOffset >= _curBlockSize) { + advanceToNextBlock(); + } + } + + @Override + public Integer next() { + if( !hasNext() ) + throw new RuntimeException("No next offset existing."); + + // Grab the lookahead value Note the +1 in the array indexing; + // the first number in a block is the block size + int offsetFromBlockBegin = _bmPtr[_bmOff+_blockStartOffset + 1 + _nextBmOffset]; + int ret = (_blockIx * BitmapEncoder.BITMAP_BLOCK_SZ) + + offsetFromBlockBegin; + _nextBmOffset++; + + // Advance to next non-empty block if we reached the end of the block. + while (_blockStartOffset < _bmLen && _nextBmOffset >= _curBlockSize) { + advanceToNextBlock(); + } + + return ret; + } + + @Override + public boolean hasNext() { + return _blockStartOffset < _bmLen; + } + + @Override + public void remove() { + throw new RuntimeException("Not implemented for BitmapDecoderOLE."); + } + + /** + * Move forward to the next block. Does not skip empty blocks. + */ + private void advanceToNextBlock() { + _blockStartOffset += (1 + _curBlockSize); + _blockIx++; + if (_blockStartOffset >= _bmLen) { + // Read past last block + return; + } + + _curBlockSize = _bmPtr[_bmOff+_blockStartOffset]; + if (_curBlockSize < 0) { + throw new RuntimeException(String.format( + "Negative block size %d at position %d of %s", + _curBlockSize, _blockStartOffset, Arrays.toString(_bmPtr))); + } + _nextBmOffset = 0; + } +} http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/da318739/src/main/java/org/apache/sysml/runtime/compress/BitmapDecoderRLE.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/runtime/compress/BitmapDecoderRLE.java b/src/main/java/org/apache/sysml/runtime/compress/BitmapDecoderRLE.java index 54f24ae..f2e05a4 100644 --- a/src/main/java/org/apache/sysml/runtime/compress/BitmapDecoderRLE.java +++ b/src/main/java/org/apache/sysml/runtime/compress/BitmapDecoderRLE.java @@ -1,117 +1,117 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.sysml.runtime.compress; - -import java.util.Iterator; - -/** - * General-purpose iterator to decode a compressed OLE bitmap. - * - */ -public final class BitmapDecoderRLE implements Iterator<Integer> -{ - // pointer to the compressed bitmap - private int _bmOff; - private int _bmLen; - private char[] _bmPtr; - - // The offset of the <b>next</b> element we will read from the bitmap, or - // bmPtr.length if we are done. - private int _nextBmOffset; - - // The offset in the matrix column of the beginning of the current run - private int _runStartOffset; - - // The length of the current run - private int _curRunLen; - - // The number of bits that we have returned from the current run. - private int _runBitsReturned; - - /** - * Point this object at the beginning of a particular bitmap. After a call - * to this method, the next call to {@link #nextOffset()} will return the - * offset of the first bit in the specified bitmap. - * - * @param bmPtr - * pointer to a compressed bitmap - */ - public BitmapDecoderRLE(char[] bmPtr, int off, int len) { - _bmOff = off; - _bmLen = len; - _bmPtr = bmPtr; - _nextBmOffset = 0; - _runStartOffset = 0; - _curRunLen = 0; - _runBitsReturned = 0; - - if (0 == _bmLen) { - return; //no runs - } - - // Advance to the beginning of the first non-empty run. - advanceToNextRun(); - } - - @Override - public Integer next() { - if( !hasNext() ) - throw new RuntimeException("No next offset existing."); - - // Grab the lookahead value - int ret = _runStartOffset + _runBitsReturned; - - _runBitsReturned++; - - // Check for end of run - if (_runBitsReturned == _curRunLen) { - advanceToNextRun(); - } - - return ret; - } - - @Override - public boolean hasNext() { - return _runBitsReturned < _curRunLen; - } - - @Override - public void remove() { - throw new RuntimeException("Not implemented for BitmapDecoderRLE."); - } - - /** Move forward to the next non-empty run. */ - private void advanceToNextRun() { - // While loop needed because some runs are of length 0 - while (_runBitsReturned == _curRunLen && _nextBmOffset < _bmLen) { - - _runBitsReturned = 0; - - // Read the distance to the next run - char delta = _bmPtr[_bmOff + _nextBmOffset]; - - // Run length is stored in the next element of the array - _runStartOffset += delta + _curRunLen; - _curRunLen = _bmPtr[_bmOff + _nextBmOffset + 1]; - _nextBmOffset += 2; - } - } -} +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.sysml.runtime.compress; + +import java.util.Iterator; + +/** + * General-purpose iterator to decode a compressed OLE bitmap. + * + */ +public final class BitmapDecoderRLE implements Iterator<Integer> +{ + // pointer to the compressed bitmap + private int _bmOff; + private int _bmLen; + private char[] _bmPtr; + + // The offset of the <b>next</b> element we will read from the bitmap, or + // bmPtr.length if we are done. + private int _nextBmOffset; + + // The offset in the matrix column of the beginning of the current run + private int _runStartOffset; + + // The length of the current run + private int _curRunLen; + + // The number of bits that we have returned from the current run. + private int _runBitsReturned; + + /** + * Point this object at the beginning of a particular bitmap. After a call + * to this method, the next call to {@link #nextOffset()} will return the + * offset of the first bit in the specified bitmap. + * + * @param bmPtr + * pointer to a compressed bitmap + */ + public BitmapDecoderRLE(char[] bmPtr, int off, int len) { + _bmOff = off; + _bmLen = len; + _bmPtr = bmPtr; + _nextBmOffset = 0; + _runStartOffset = 0; + _curRunLen = 0; + _runBitsReturned = 0; + + if (0 == _bmLen) { + return; //no runs + } + + // Advance to the beginning of the first non-empty run. + advanceToNextRun(); + } + + @Override + public Integer next() { + if( !hasNext() ) + throw new RuntimeException("No next offset existing."); + + // Grab the lookahead value + int ret = _runStartOffset + _runBitsReturned; + + _runBitsReturned++; + + // Check for end of run + if (_runBitsReturned == _curRunLen) { + advanceToNextRun(); + } + + return ret; + } + + @Override + public boolean hasNext() { + return _runBitsReturned < _curRunLen; + } + + @Override + public void remove() { + throw new RuntimeException("Not implemented for BitmapDecoderRLE."); + } + + /** Move forward to the next non-empty run. */ + private void advanceToNextRun() { + // While loop needed because some runs are of length 0 + while (_runBitsReturned == _curRunLen && _nextBmOffset < _bmLen) { + + _runBitsReturned = 0; + + // Read the distance to the next run + char delta = _bmPtr[_bmOff + _nextBmOffset]; + + // Run length is stored in the next element of the array + _runStartOffset += delta + _curRunLen; + _curRunLen = _bmPtr[_bmOff + _nextBmOffset + 1]; + _nextBmOffset += 2; + } + } +} http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/da318739/src/main/java/org/apache/sysml/runtime/compress/BitmapEncoder.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/runtime/compress/BitmapEncoder.java b/src/main/java/org/apache/sysml/runtime/compress/BitmapEncoder.java index 8a535e1..75ddf9a 100644 --- a/src/main/java/org/apache/sysml/runtime/compress/BitmapEncoder.java +++ b/src/main/java/org/apache/sysml/runtime/compress/BitmapEncoder.java @@ -1,392 +1,392 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.sysml.runtime.compress; - -import java.util.ArrayList; -import java.util.Arrays; - -import org.apache.sysml.runtime.DMLRuntimeException; -import org.apache.sysml.runtime.compress.utils.DblArray; -import org.apache.sysml.runtime.compress.utils.DblArrayIntListHashMap; -import org.apache.sysml.runtime.compress.utils.DoubleIntListHashMap; -import org.apache.sysml.runtime.compress.utils.IntArrayList; -import org.apache.sysml.runtime.matrix.data.MatrixBlock; -import org.apache.sysml.runtime.matrix.data.SparseBlock; - - -/** - * Static functions for encoding bitmaps in various ways. - * - */ -public class BitmapEncoder -{ - /** Size of the blocks used in a blocked bitmap representation. */ - public static final int BITMAP_BLOCK_SZ = 65536; - - /** - * Generate uncompressed bitmaps for a set of columns in an uncompressed - * matrix block. - * - * @param colIndices - * indexes (within the block) of the columns to extract - * @param rawblock - * an uncompressed matrix block; can be dense or sparse - * @return uncompressed bitmap representation of the columns - * @throws DMLRuntimeException - */ - public static UncompressedBitmap extractBitmap(int[] colIndices, MatrixBlock rawblock) - { - //note: no sparse column selection reader because low potential - //single column selection - if( colIndices.length==1 ) { - return extractBitmap(colIndices[0], rawblock, - !CompressedMatrixBlock.MATERIALIZE_ZEROS); - } - //multiple column selection (general case) - else { - ReaderColumnSelection reader = null; - if( rawblock.isInSparseFormat() && CompressedMatrixBlock.TRANSPOSE_INPUT ) - reader = new ReaderColumnSelectionSparse(rawblock, colIndices, - !CompressedMatrixBlock.MATERIALIZE_ZEROS); - else - reader = new ReaderColumnSelectionDense(rawblock, colIndices, - !CompressedMatrixBlock.MATERIALIZE_ZEROS); - - return extractBitmap(colIndices, rawblock, reader); - } - } - - /** - * - * @param colIndices - * @param rawblock - * @param sampleIndexes - * @return - */ - public static UncompressedBitmap extractBitmapFromSample(int[] colIndices, - MatrixBlock rawblock, int[] sampleIndexes) - { - //note: no sparse column selection reader because low potential - - //single column selection - if( colIndices.length==1 ) { - return extractBitmap(colIndices[0], rawblock, sampleIndexes, - !CompressedMatrixBlock.MATERIALIZE_ZEROS); - } - //multiple column selection (general case) - else { - return extractBitmap(colIndices, rawblock, - new ReaderColumnSelectionDenseSample(rawblock, colIndices, - sampleIndexes, !CompressedMatrixBlock.MATERIALIZE_ZEROS)); - } - } - - /** - * Encodes the bitmap as a series of run lengths and offsets. - * <p> - * <b>NOTE: This method must be kept in sync with {@link BitmapDecoderRLE} - * !</b> - * - * @param offsets - * uncompressed contents of the bitmap, expressed as a list of - * the offsets of different bits - * @return compressed version of said bitmap - */ - public static char[] genRLEBitmap(int[] offsets) { - if( offsets.length == 0 ) - return new char[0]; //empty list - - // Use an ArrayList for correctness at the expense of temp space - ArrayList<Character> buf = new ArrayList<Character>(); - - // 1 + (position of last 1 in the previous run of 1's) - // We add 1 because runs may be of length zero. - int lastRunEnd = 0; - - // Offset between the end of the previous run of 1's and the first 1 in - // the current run. Initialized below. - int curRunOff; - - // Length of the most recent run of 1's - int curRunLen = 0; - - // Current encoding is as follows: - // Negative entry: abs(Entry) encodes the offset to the next lone 1 bit. - // Positive entry: Entry encodes offset to next run of 1's. The next - // entry in the bitmap holds a run length. - - // Special-case the first run to simplify the loop below. - int firstOff = offsets[0]; - - // The first run may start more than a short's worth of bits in - while (firstOff > Character.MAX_VALUE) { - buf.add(Character.MAX_VALUE); - buf.add((char) 0); - firstOff -= Character.MAX_VALUE; - lastRunEnd += Character.MAX_VALUE; - } - - // Create the first run with an initial size of 1 - curRunOff = firstOff; - curRunLen = 1; - - // Process the remaining offsets - for (int i = 1; i < offsets.length; i++) { - - int absOffset = offsets[i]; - - // 1 + (last position in run) - int curRunEnd = lastRunEnd + curRunOff + curRunLen; - - if (absOffset > curRunEnd || curRunLen >= Character.MAX_VALUE) { - // End of a run, either because we hit a run of 0's or because the - // number of 1's won't fit in 16 bits. Add run to bitmap and start a new one. - buf.add((char) curRunOff); - buf.add((char) curRunLen); - - lastRunEnd = curRunEnd; - curRunOff = absOffset - lastRunEnd; - - while (curRunOff > Character.MAX_VALUE) { - // SPECIAL CASE: Offset to next run doesn't fit into 16 bits. - // Add zero-length runs until the offset is small enough. - buf.add(Character.MAX_VALUE); - buf.add((char) 0); - lastRunEnd += Character.MAX_VALUE; - curRunOff -= Character.MAX_VALUE; - } - - curRunLen = 1; - } else { - // Middle of a run - curRunLen++; - } - } - - // Close out the last run - if (curRunLen >= 1) { - buf.add((char) curRunOff); - buf.add((char) curRunLen); - } - - // Convert wasteful ArrayList to packed array. - char[] ret = new char[buf.size()]; - for (int i = 0; i < buf.size(); i++) { - ret[i] = buf.get(i); - } - return ret; - } - - /** - * Encodes the bitmap in blocks of offsets. Within each block, the bits are - * stored as absolute offsets from the start of the block. - * - * @param offsets - * uncompressed contents of the bitmap, expressed as a list of - * the offsets of different bits - * @return compressed version of said bitmap - */ - public static char[] genOffsetBitmap(int[] offsets) - { - int lastOffset = offsets[offsets.length - 1]; - - // Build up the blocks - int numBlocks = (lastOffset / BITMAP_BLOCK_SZ) + 1; - // To simplify the logic, we make two passes. - // The first pass divides the offsets by block. - int[] blockLengths = new int[numBlocks]; - Arrays.fill(blockLengths, 0); - - for (int ix = 0; ix < offsets.length; ix++) { - int val = offsets[ix]; - int blockForVal = val / BITMAP_BLOCK_SZ; - - blockLengths[blockForVal]++; - } - - // The second pass creates the blocks. - int totalSize = numBlocks; - for (int block = 0; block < numBlocks; block++) { - totalSize += blockLengths[block]; - } - char[] encodedBlocks = new char[totalSize]; - - int inputIx = 0; - int blockStartIx = 0; - for (int block = 0; block < numBlocks; block++) { - int blockSz = blockLengths[block]; - - // First entry in the block is number of bits - encodedBlocks[blockStartIx] = (char) blockSz; - - for (int i = 0; i < blockSz; i++) { - encodedBlocks[blockStartIx + i + 1] = (char) - (offsets[inputIx+i] % BITMAP_BLOCK_SZ); - } - - inputIx += blockSz; - blockStartIx += blockSz + 1; - } - - return encodedBlocks; - } - - /** - * - * @param colIndex - * @param rawblock - * @return - * @throws DMLRuntimeException - */ - private static UncompressedBitmap extractBitmap(int colIndex, MatrixBlock rawblock, boolean skipZeros) - { - //probe map for distinct items (for value or value groups) - DoubleIntListHashMap distinctVals = new DoubleIntListHashMap(); - - //scan rows and probe/build distinct items - final int m = CompressedMatrixBlock.TRANSPOSE_INPUT ? - rawblock.getNumColumns():rawblock.getNumRows(); - - if( rawblock.isInSparseFormat() //SPARSE - && CompressedMatrixBlock.TRANSPOSE_INPUT ) - { - SparseBlock a = rawblock.getSparseBlock(); - if( a != null && !a.isEmpty(colIndex) ) - { - int apos = a.pos(colIndex); - int alen = a.size(colIndex); - int[] aix = a.indexes(colIndex); - double[] avals = a.values(colIndex); - - IntArrayList lstPtr0 = new IntArrayList(); //for 0 values - int last = -1; - //iterate over non-zero entries but fill in zeros - for( int j=apos; j<apos+alen; j++ ) - { - //fill in zero values - if( !skipZeros ) - for( int k=last+1; k<aix[j]; k++ ) - lstPtr0.appendValue(k); - //handle non-zero value - IntArrayList lstPtr = distinctVals.get(avals[j]); - if( lstPtr == null ) { - lstPtr = new IntArrayList(); - distinctVals.appendValue(avals[j], lstPtr); - } - lstPtr.appendValue(aix[j]); - last = aix[j]; - } - //fill in remaining zero values - if( !skipZeros ) { - for( int k=last+1; k<m; k++ ) - lstPtr0.appendValue(k); - if( lstPtr0.size()>0 ) - distinctVals.appendValue(0, lstPtr0); - } - } - else if( !skipZeros ) { //full 0 column - IntArrayList lstPtr = new IntArrayList(); - for( int i=0; i<m; i++ ) - lstPtr.appendValue(i); - distinctVals.appendValue(0, lstPtr); - } - } - else //GENERAL CASE - { - for( int i=0; i<m; i++ ) { - double val = CompressedMatrixBlock.TRANSPOSE_INPUT ? - rawblock.quickGetValue(colIndex, i): - rawblock.quickGetValue(i, colIndex); - if( val!=0 || !skipZeros ) { - IntArrayList lstPtr = distinctVals.get(val); - if( lstPtr == null ) { - lstPtr = new IntArrayList(); - distinctVals.appendValue(val, lstPtr); - } - lstPtr.appendValue(i); - } - } - } - - return new UncompressedBitmap(distinctVals); - } - - /** - * - * @param colIndex - * @param rawblock - * @param sampleIndexes - * @return - */ - private static UncompressedBitmap extractBitmap(int colIndex, MatrixBlock rawblock, int[] sampleIndexes, boolean skipZeros) - { - //note: general case only because anyway binary search for small samples - - //probe map for distinct items (for value or value groups) - DoubleIntListHashMap distinctVals = new DoubleIntListHashMap(); - - //scan rows and probe/build distinct items - final int m = sampleIndexes.length; - for( int i=0; i<m; i++ ) { - int rowIndex = sampleIndexes[i]; - double val = CompressedMatrixBlock.TRANSPOSE_INPUT ? - rawblock.quickGetValue(colIndex, rowIndex) : - rawblock.quickGetValue(rowIndex, colIndex); - if( val!=0 || !skipZeros ) { - IntArrayList lstPtr = distinctVals.get(val); - if( lstPtr == null ) { - lstPtr = new IntArrayList(); - distinctVals.appendValue(val, lstPtr); - } - lstPtr.appendValue(i); - } - } - - return new UncompressedBitmap(distinctVals); - } - - /** - * - * @param colIndices - * @param rawblock - * @param rowReader - * @return - */ - private static UncompressedBitmap extractBitmap(int[] colIndices, - MatrixBlock rawblock, ReaderColumnSelection rowReader) - { - //probe map for distinct items (for value or value groups) - DblArrayIntListHashMap distinctVals = new DblArrayIntListHashMap(); - - //scan rows and probe/build distinct items - DblArray cellVals = null; - while ((cellVals = rowReader.nextRow()) != null) { - IntArrayList lstPtr = distinctVals.get(cellVals); - if (lstPtr == null) { - //create new objects only on demand - lstPtr = new IntArrayList(); - distinctVals.appendValue(new DblArray(cellVals), lstPtr); - } - lstPtr.appendValue(rowReader.getCurrentRowIndex()); - } - - return new UncompressedBitmap(distinctVals, colIndices.length); - } -} +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.sysml.runtime.compress; + +import java.util.ArrayList; +import java.util.Arrays; + +import org.apache.sysml.runtime.DMLRuntimeException; +import org.apache.sysml.runtime.compress.utils.DblArray; +import org.apache.sysml.runtime.compress.utils.DblArrayIntListHashMap; +import org.apache.sysml.runtime.compress.utils.DoubleIntListHashMap; +import org.apache.sysml.runtime.compress.utils.IntArrayList; +import org.apache.sysml.runtime.matrix.data.MatrixBlock; +import org.apache.sysml.runtime.matrix.data.SparseBlock; + + +/** + * Static functions for encoding bitmaps in various ways. + * + */ +public class BitmapEncoder +{ + /** Size of the blocks used in a blocked bitmap representation. */ + public static final int BITMAP_BLOCK_SZ = 65536; + + /** + * Generate uncompressed bitmaps for a set of columns in an uncompressed + * matrix block. + * + * @param colIndices + * indexes (within the block) of the columns to extract + * @param rawblock + * an uncompressed matrix block; can be dense or sparse + * @return uncompressed bitmap representation of the columns + * @throws DMLRuntimeException + */ + public static UncompressedBitmap extractBitmap(int[] colIndices, MatrixBlock rawblock) + { + //note: no sparse column selection reader because low potential + //single column selection + if( colIndices.length==1 ) { + return extractBitmap(colIndices[0], rawblock, + !CompressedMatrixBlock.MATERIALIZE_ZEROS); + } + //multiple column selection (general case) + else { + ReaderColumnSelection reader = null; + if( rawblock.isInSparseFormat() && CompressedMatrixBlock.TRANSPOSE_INPUT ) + reader = new ReaderColumnSelectionSparse(rawblock, colIndices, + !CompressedMatrixBlock.MATERIALIZE_ZEROS); + else + reader = new ReaderColumnSelectionDense(rawblock, colIndices, + !CompressedMatrixBlock.MATERIALIZE_ZEROS); + + return extractBitmap(colIndices, rawblock, reader); + } + } + + /** + * + * @param colIndices + * @param rawblock + * @param sampleIndexes + * @return + */ + public static UncompressedBitmap extractBitmapFromSample(int[] colIndices, + MatrixBlock rawblock, int[] sampleIndexes) + { + //note: no sparse column selection reader because low potential + + //single column selection + if( colIndices.length==1 ) { + return extractBitmap(colIndices[0], rawblock, sampleIndexes, + !CompressedMatrixBlock.MATERIALIZE_ZEROS); + } + //multiple column selection (general case) + else { + return extractBitmap(colIndices, rawblock, + new ReaderColumnSelectionDenseSample(rawblock, colIndices, + sampleIndexes, !CompressedMatrixBlock.MATERIALIZE_ZEROS)); + } + } + + /** + * Encodes the bitmap as a series of run lengths and offsets. + * <p> + * <b>NOTE: This method must be kept in sync with {@link BitmapDecoderRLE} + * !</b> + * + * @param offsets + * uncompressed contents of the bitmap, expressed as a list of + * the offsets of different bits + * @return compressed version of said bitmap + */ + public static char[] genRLEBitmap(int[] offsets) { + if( offsets.length == 0 ) + return new char[0]; //empty list + + // Use an ArrayList for correctness at the expense of temp space + ArrayList<Character> buf = new ArrayList<Character>(); + + // 1 + (position of last 1 in the previous run of 1's) + // We add 1 because runs may be of length zero. + int lastRunEnd = 0; + + // Offset between the end of the previous run of 1's and the first 1 in + // the current run. Initialized below. + int curRunOff; + + // Length of the most recent run of 1's + int curRunLen = 0; + + // Current encoding is as follows: + // Negative entry: abs(Entry) encodes the offset to the next lone 1 bit. + // Positive entry: Entry encodes offset to next run of 1's. The next + // entry in the bitmap holds a run length. + + // Special-case the first run to simplify the loop below. + int firstOff = offsets[0]; + + // The first run may start more than a short's worth of bits in + while (firstOff > Character.MAX_VALUE) { + buf.add(Character.MAX_VALUE); + buf.add((char) 0); + firstOff -= Character.MAX_VALUE; + lastRunEnd += Character.MAX_VALUE; + } + + // Create the first run with an initial size of 1 + curRunOff = firstOff; + curRunLen = 1; + + // Process the remaining offsets + for (int i = 1; i < offsets.length; i++) { + + int absOffset = offsets[i]; + + // 1 + (last position in run) + int curRunEnd = lastRunEnd + curRunOff + curRunLen; + + if (absOffset > curRunEnd || curRunLen >= Character.MAX_VALUE) { + // End of a run, either because we hit a run of 0's or because the + // number of 1's won't fit in 16 bits. Add run to bitmap and start a new one. + buf.add((char) curRunOff); + buf.add((char) curRunLen); + + lastRunEnd = curRunEnd; + curRunOff = absOffset - lastRunEnd; + + while (curRunOff > Character.MAX_VALUE) { + // SPECIAL CASE: Offset to next run doesn't fit into 16 bits. + // Add zero-length runs until the offset is small enough. + buf.add(Character.MAX_VALUE); + buf.add((char) 0); + lastRunEnd += Character.MAX_VALUE; + curRunOff -= Character.MAX_VALUE; + } + + curRunLen = 1; + } else { + // Middle of a run + curRunLen++; + } + } + + // Close out the last run + if (curRunLen >= 1) { + buf.add((char) curRunOff); + buf.add((char) curRunLen); + } + + // Convert wasteful ArrayList to packed array. + char[] ret = new char[buf.size()]; + for (int i = 0; i < buf.size(); i++) { + ret[i] = buf.get(i); + } + return ret; + } + + /** + * Encodes the bitmap in blocks of offsets. Within each block, the bits are + * stored as absolute offsets from the start of the block. + * + * @param offsets + * uncompressed contents of the bitmap, expressed as a list of + * the offsets of different bits + * @return compressed version of said bitmap + */ + public static char[] genOffsetBitmap(int[] offsets) + { + int lastOffset = offsets[offsets.length - 1]; + + // Build up the blocks + int numBlocks = (lastOffset / BITMAP_BLOCK_SZ) + 1; + // To simplify the logic, we make two passes. + // The first pass divides the offsets by block. + int[] blockLengths = new int[numBlocks]; + Arrays.fill(blockLengths, 0); + + for (int ix = 0; ix < offsets.length; ix++) { + int val = offsets[ix]; + int blockForVal = val / BITMAP_BLOCK_SZ; + + blockLengths[blockForVal]++; + } + + // The second pass creates the blocks. + int totalSize = numBlocks; + for (int block = 0; block < numBlocks; block++) { + totalSize += blockLengths[block]; + } + char[] encodedBlocks = new char[totalSize]; + + int inputIx = 0; + int blockStartIx = 0; + for (int block = 0; block < numBlocks; block++) { + int blockSz = blockLengths[block]; + + // First entry in the block is number of bits + encodedBlocks[blockStartIx] = (char) blockSz; + + for (int i = 0; i < blockSz; i++) { + encodedBlocks[blockStartIx + i + 1] = (char) + (offsets[inputIx+i] % BITMAP_BLOCK_SZ); + } + + inputIx += blockSz; + blockStartIx += blockSz + 1; + } + + return encodedBlocks; + } + + /** + * + * @param colIndex + * @param rawblock + * @return + * @throws DMLRuntimeException + */ + private static UncompressedBitmap extractBitmap(int colIndex, MatrixBlock rawblock, boolean skipZeros) + { + //probe map for distinct items (for value or value groups) + DoubleIntListHashMap distinctVals = new DoubleIntListHashMap(); + + //scan rows and probe/build distinct items + final int m = CompressedMatrixBlock.TRANSPOSE_INPUT ? + rawblock.getNumColumns():rawblock.getNumRows(); + + if( rawblock.isInSparseFormat() //SPARSE + && CompressedMatrixBlock.TRANSPOSE_INPUT ) + { + SparseBlock a = rawblock.getSparseBlock(); + if( a != null && !a.isEmpty(colIndex) ) + { + int apos = a.pos(colIndex); + int alen = a.size(colIndex); + int[] aix = a.indexes(colIndex); + double[] avals = a.values(colIndex); + + IntArrayList lstPtr0 = new IntArrayList(); //for 0 values + int last = -1; + //iterate over non-zero entries but fill in zeros + for( int j=apos; j<apos+alen; j++ ) + { + //fill in zero values + if( !skipZeros ) + for( int k=last+1; k<aix[j]; k++ ) + lstPtr0.appendValue(k); + //handle non-zero value + IntArrayList lstPtr = distinctVals.get(avals[j]); + if( lstPtr == null ) { + lstPtr = new IntArrayList(); + distinctVals.appendValue(avals[j], lstPtr); + } + lstPtr.appendValue(aix[j]); + last = aix[j]; + } + //fill in remaining zero values + if( !skipZeros ) { + for( int k=last+1; k<m; k++ ) + lstPtr0.appendValue(k); + if( lstPtr0.size()>0 ) + distinctVals.appendValue(0, lstPtr0); + } + } + else if( !skipZeros ) { //full 0 column + IntArrayList lstPtr = new IntArrayList(); + for( int i=0; i<m; i++ ) + lstPtr.appendValue(i); + distinctVals.appendValue(0, lstPtr); + } + } + else //GENERAL CASE + { + for( int i=0; i<m; i++ ) { + double val = CompressedMatrixBlock.TRANSPOSE_INPUT ? + rawblock.quickGetValue(colIndex, i): + rawblock.quickGetValue(i, colIndex); + if( val!=0 || !skipZeros ) { + IntArrayList lstPtr = distinctVals.get(val); + if( lstPtr == null ) { + lstPtr = new IntArrayList(); + distinctVals.appendValue(val, lstPtr); + } + lstPtr.appendValue(i); + } + } + } + + return new UncompressedBitmap(distinctVals); + } + + /** + * + * @param colIndex + * @param rawblock + * @param sampleIndexes + * @return + */ + private static UncompressedBitmap extractBitmap(int colIndex, MatrixBlock rawblock, int[] sampleIndexes, boolean skipZeros) + { + //note: general case only because anyway binary search for small samples + + //probe map for distinct items (for value or value groups) + DoubleIntListHashMap distinctVals = new DoubleIntListHashMap(); + + //scan rows and probe/build distinct items + final int m = sampleIndexes.length; + for( int i=0; i<m; i++ ) { + int rowIndex = sampleIndexes[i]; + double val = CompressedMatrixBlock.TRANSPOSE_INPUT ? + rawblock.quickGetValue(colIndex, rowIndex) : + rawblock.quickGetValue(rowIndex, colIndex); + if( val!=0 || !skipZeros ) { + IntArrayList lstPtr = distinctVals.get(val); + if( lstPtr == null ) { + lstPtr = new IntArrayList(); + distinctVals.appendValue(val, lstPtr); + } + lstPtr.appendValue(i); + } + } + + return new UncompressedBitmap(distinctVals); + } + + /** + * + * @param colIndices + * @param rawblock + * @param rowReader + * @return + */ + private static UncompressedBitmap extractBitmap(int[] colIndices, + MatrixBlock rawblock, ReaderColumnSelection rowReader) + { + //probe map for distinct items (for value or value groups) + DblArrayIntListHashMap distinctVals = new DblArrayIntListHashMap(); + + //scan rows and probe/build distinct items + DblArray cellVals = null; + while ((cellVals = rowReader.nextRow()) != null) { + IntArrayList lstPtr = distinctVals.get(cellVals); + if (lstPtr == null) { + //create new objects only on demand + lstPtr = new IntArrayList(); + distinctVals.appendValue(new DblArray(cellVals), lstPtr); + } + lstPtr.appendValue(rowReader.getCurrentRowIndex()); + } + + return new UncompressedBitmap(distinctVals, colIndices.length); + } +} http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/da318739/src/main/java/org/apache/sysml/runtime/compress/ColGroup.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/runtime/compress/ColGroup.java b/src/main/java/org/apache/sysml/runtime/compress/ColGroup.java index 647a701..b1072d1 100644 --- a/src/main/java/org/apache/sysml/runtime/compress/ColGroup.java +++ b/src/main/java/org/apache/sysml/runtime/compress/ColGroup.java @@ -1,259 +1,259 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.sysml.runtime.compress; - -import java.io.DataInput; -import java.io.DataOutput; -import java.io.IOException; -import java.io.Serializable; -import java.util.List; - -import org.apache.sysml.runtime.DMLRuntimeException; -import org.apache.sysml.runtime.matrix.data.MatrixBlock; -import org.apache.sysml.runtime.matrix.operators.AggregateUnaryOperator; -import org.apache.sysml.runtime.matrix.operators.ScalarOperator; - -/** - * Class that stores information about a column group within a compressed matrix - * block. There are subclasses specific to each compression type. - * - */ -public abstract class ColGroup implements Serializable -{ - private static final long serialVersionUID = 2439785418908671481L; - - public enum CompressionType { - UNCOMPRESSED, //uncompressed sparse/dense - RLE_BITMAP, //RLE bitmap - OLE_BITMAP; //OLE bitmap - } - - /** - * Offsets of the columns that make up the column group. Zero-based, and - * relative to the matrix block. - */ - protected int[] _colIndexes; - - /** Number of rows in the matrix, for use by child classes. */ - protected int _numRows; - - /** How the elements of the column group are compressed. */ - private CompressionType _compType; - - - /** - * Main constructor. - * - * @param colIndices - * offsets of the columns in the matrix block that make up the - * group - * @param numRows - * total number of rows in the parent block - */ - protected ColGroup(CompressionType type, int[] colIndices, int numRows) { - _compType = type; - _colIndexes = colIndices; - _numRows = numRows; - } - - /** Convenience constructor for converting indices to a more compact format. */ - protected ColGroup(CompressionType type, List<Integer> colIndicesList, int numRows) { - _compType = type; - _colIndexes = new int[colIndicesList.size()]; - int i = 0; - for (Integer index : colIndicesList) - _colIndexes[i++] = index; - } - - /** - * @return offsets of the columns in the matrix block that make up the group - */ - public int[] getColIndices() { - return _colIndexes; - } - - /** - * @param col - * an index from 0 to the number of columns in this group - 1 - * @return offset of the specified column in the matrix block that make up - * the group - */ - public int getColIndex(int colNum) { - return _colIndexes[colNum]; - } - - public int getNumRows() { - return _numRows; - } - - /** - * @return number of columns in this column group - */ - public int getNumCols() { - return _colIndexes.length; - } - - /** - * @return How the elements of the column group are compressed. - */ - public CompressionType getCompType() { - return _compType; - } - - /** - * - * @param offset - */ - public void shiftColIndices(int offset) { - for( int i=0; i<_colIndexes.length; i++ ) - _colIndexes[i] += offset; - } - - /** - * Note: Must be overridden by child classes to account for additional data - * and metadata - * - * @return an upper bound on the number of bytes used to store this ColGroup - * in memory. - */ - public long estimateInMemorySize() { - // int numRows (4B) , array reference colIndices (8B) + array object - // overhead if exists (32B) + 4B per element, CompressionType compType - // (2 booleans 2B + enum overhead 32B + reference to enum 8B) - long size = 54; - if (_colIndexes == null) - return size; - else - return size + 32 + 4 * _colIndexes.length; - } - - /** - * Decompress the contents of this column group into the specified full - * matrix block. - * - * @param target - * a matrix block where the columns covered by this column group - * have not yet been filled in. - */ - public abstract void decompressToBlock(MatrixBlock target); - - /** - * Decompress the contents of this column group into uncompressed packed - * columns - * - * @param target - * a dense matrix block. The block must have enough space to hold - * the contents of this column group. - * @param colIndexTargets - * array that maps column indices in the original matrix block to - * columns of target. - */ - public abstract void decompressToBlock(MatrixBlock target, int[] colIndexTargets); - - /** - * - * @param target dense output vector - * @param colpos column to decompress, error if larger or equal numCols - */ - public abstract void decompressToBlock(MatrixBlock target, int colpos); - - - /** - * Serializes column group to data output. - * - * @param out - * @throws IOException - */ - public abstract void write(DataOutput out) - throws IOException; - - /** - * Deserializes column group from data input. - * - * @param in - * @throws IOException - */ - public abstract void readFields(DataInput in) - throws IOException; - - - /** - * Returns the exact serialized size of column group. - * This can be used for example for buffer preallocation. - * - * @return - */ - public abstract long getExactSizeOnDisk(); - - /** - * Multiply the slice of the matrix that this column group represents by a - * vector on the right. - * - * @param vector - * vector to multiply by (tall vector) - * @param result - * accumulator for holding the result - * @throws DMLRuntimeException - * if the internal SystemML code that performs the - * multiplication experiences an error - */ - public abstract void rightMultByVector(MatrixBlock vector, - MatrixBlock result, int rl, int ru) throws DMLRuntimeException; - - - /** - * Multiply the slice of the matrix that this column group represents by a - * row vector on the left (the original column vector is assumed to be - * transposed already i.e. its size now is 1xn). - * - * @param vector - * @param result - * @throws DMLRuntimeException - */ - public abstract void leftMultByRowVector(MatrixBlock vector, - MatrixBlock result) throws DMLRuntimeException; - - /** - * Perform the specified scalar operation directly on the compressed column - * group, without decompressing individual cells if possible. - * - * @param op - * operation to perform - * @return version of this column group with the operation applied - */ - public abstract ColGroup scalarOperation(ScalarOperator op) - throws DMLRuntimeException; - - /** - * - * @param op - * @param result - * @throws DMLUnsupportedOperationException - * @throws DMLRuntimeException - */ - public abstract void unaryAggregateOperations(AggregateUnaryOperator op, MatrixBlock result) - throws DMLRuntimeException; - - /** - * - * @param rnnz - */ - protected abstract void countNonZerosPerRow(int[] rnnz); -} +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.sysml.runtime.compress; + +import java.io.DataInput; +import java.io.DataOutput; +import java.io.IOException; +import java.io.Serializable; +import java.util.List; + +import org.apache.sysml.runtime.DMLRuntimeException; +import org.apache.sysml.runtime.matrix.data.MatrixBlock; +import org.apache.sysml.runtime.matrix.operators.AggregateUnaryOperator; +import org.apache.sysml.runtime.matrix.operators.ScalarOperator; + +/** + * Class that stores information about a column group within a compressed matrix + * block. There are subclasses specific to each compression type. + * + */ +public abstract class ColGroup implements Serializable +{ + private static final long serialVersionUID = 2439785418908671481L; + + public enum CompressionType { + UNCOMPRESSED, //uncompressed sparse/dense + RLE_BITMAP, //RLE bitmap + OLE_BITMAP; //OLE bitmap + } + + /** + * Offsets of the columns that make up the column group. Zero-based, and + * relative to the matrix block. + */ + protected int[] _colIndexes; + + /** Number of rows in the matrix, for use by child classes. */ + protected int _numRows; + + /** How the elements of the column group are compressed. */ + private CompressionType _compType; + + + /** + * Main constructor. + * + * @param colIndices + * offsets of the columns in the matrix block that make up the + * group + * @param numRows + * total number of rows in the parent block + */ + protected ColGroup(CompressionType type, int[] colIndices, int numRows) { + _compType = type; + _colIndexes = colIndices; + _numRows = numRows; + } + + /** Convenience constructor for converting indices to a more compact format. */ + protected ColGroup(CompressionType type, List<Integer> colIndicesList, int numRows) { + _compType = type; + _colIndexes = new int[colIndicesList.size()]; + int i = 0; + for (Integer index : colIndicesList) + _colIndexes[i++] = index; + } + + /** + * @return offsets of the columns in the matrix block that make up the group + */ + public int[] getColIndices() { + return _colIndexes; + } + + /** + * @param col + * an index from 0 to the number of columns in this group - 1 + * @return offset of the specified column in the matrix block that make up + * the group + */ + public int getColIndex(int colNum) { + return _colIndexes[colNum]; + } + + public int getNumRows() { + return _numRows; + } + + /** + * @return number of columns in this column group + */ + public int getNumCols() { + return _colIndexes.length; + } + + /** + * @return How the elements of the column group are compressed. + */ + public CompressionType getCompType() { + return _compType; + } + + /** + * + * @param offset + */ + public void shiftColIndices(int offset) { + for( int i=0; i<_colIndexes.length; i++ ) + _colIndexes[i] += offset; + } + + /** + * Note: Must be overridden by child classes to account for additional data + * and metadata + * + * @return an upper bound on the number of bytes used to store this ColGroup + * in memory. + */ + public long estimateInMemorySize() { + // int numRows (4B) , array reference colIndices (8B) + array object + // overhead if exists (32B) + 4B per element, CompressionType compType + // (2 booleans 2B + enum overhead 32B + reference to enum 8B) + long size = 54; + if (_colIndexes == null) + return size; + else + return size + 32 + 4 * _colIndexes.length; + } + + /** + * Decompress the contents of this column group into the specified full + * matrix block. + * + * @param target + * a matrix block where the columns covered by this column group + * have not yet been filled in. + */ + public abstract void decompressToBlock(MatrixBlock target); + + /** + * Decompress the contents of this column group into uncompressed packed + * columns + * + * @param target + * a dense matrix block. The block must have enough space to hold + * the contents of this column group. + * @param colIndexTargets + * array that maps column indices in the original matrix block to + * columns of target. + */ + public abstract void decompressToBlock(MatrixBlock target, int[] colIndexTargets); + + /** + * + * @param target dense output vector + * @param colpos column to decompress, error if larger or equal numCols + */ + public abstract void decompressToBlock(MatrixBlock target, int colpos); + + + /** + * Serializes column group to data output. + * + * @param out + * @throws IOException + */ + public abstract void write(DataOutput out) + throws IOException; + + /** + * Deserializes column group from data input. + * + * @param in + * @throws IOException + */ + public abstract void readFields(DataInput in) + throws IOException; + + + /** + * Returns the exact serialized size of column group. + * This can be used for example for buffer preallocation. + * + * @return + */ + public abstract long getExactSizeOnDisk(); + + /** + * Multiply the slice of the matrix that this column group represents by a + * vector on the right. + * + * @param vector + * vector to multiply by (tall vector) + * @param result + * accumulator for holding the result + * @throws DMLRuntimeException + * if the internal SystemML code that performs the + * multiplication experiences an error + */ + public abstract void rightMultByVector(MatrixBlock vector, + MatrixBlock result, int rl, int ru) throws DMLRuntimeException; + + + /** + * Multiply the slice of the matrix that this column group represents by a + * row vector on the left (the original column vector is assumed to be + * transposed already i.e. its size now is 1xn). + * + * @param vector + * @param result + * @throws DMLRuntimeException + */ + public abstract void leftMultByRowVector(MatrixBlock vector, + MatrixBlock result) throws DMLRuntimeException; + + /** + * Perform the specified scalar operation directly on the compressed column + * group, without decompressing individual cells if possible. + * + * @param op + * operation to perform + * @return version of this column group with the operation applied + */ + public abstract ColGroup scalarOperation(ScalarOperator op) + throws DMLRuntimeException; + + /** + * + * @param op + * @param result + * @throws DMLUnsupportedOperationException + * @throws DMLRuntimeException + */ + public abstract void unaryAggregateOperations(AggregateUnaryOperator op, MatrixBlock result) + throws DMLRuntimeException; + + /** + * + * @param rnnz + */ + protected abstract void countNonZerosPerRow(int[] rnnz); +} http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/da318739/src/main/java/org/apache/sysml/runtime/compress/ColGroupBitmap.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/runtime/compress/ColGroupBitmap.java b/src/main/java/org/apache/sysml/runtime/compress/ColGroupBitmap.java index 5d60a49..3455cc7 100644 --- a/src/main/java/org/apache/sysml/runtime/compress/ColGroupBitmap.java +++ b/src/main/java/org/apache/sysml/runtime/compress/ColGroupBitmap.java @@ -1,498 +1,498 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.sysml.runtime.compress; - -import java.io.DataInput; -import java.io.DataOutput; -import java.io.IOException; -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Iterator; -import java.util.TreeMap; -import java.util.Map.Entry; - -import org.apache.sysml.runtime.DMLRuntimeException; -import org.apache.sysml.runtime.compress.utils.LinearAlgebraUtils; -import org.apache.sysml.runtime.matrix.data.MatrixBlock; -import org.apache.sysml.runtime.matrix.operators.ScalarOperator; - - -/** - * Base class for column groups encoded with various types of bitmap encoding. - * - * - * NOTES: - * * OLE: separate storage segment length and bitmaps led to a 30% improvement - * but not applied because more difficult to support both data layouts at the - * same time (distributed/local as well as w/ and w/o low-level opt) - */ -public abstract class ColGroupBitmap extends ColGroup -{ - private static final long serialVersionUID = -1635828933479403125L; - - public static final boolean LOW_LEVEL_OPT = true; - //sorting of values by physical length helps by 10-20%, especially for serial, while - //slight performance decrease for parallel incl multi-threaded, hence not applied for - //distributed operations (also because compression time + garbage collection increases) - private static final boolean SORT_VALUES_BY_LENGTH = true; - protected static final boolean CREATE_SKIPLIST = true; - - protected static final int READ_CACHE_BLKSZ = 2 * BitmapEncoder.BITMAP_BLOCK_SZ; - protected static final int WRITE_CACHE_BLKSZ = 2 * BitmapEncoder.BITMAP_BLOCK_SZ; - - /** Distinct values associated with individual bitmaps. */ - protected double[] _values; //linearized <numcol vals> <numcol vals> - - /** Bitmaps, one per uncompressed value in {@link #values}. */ - protected int[] _ptr; //bitmap offsets per value - protected char[] _data; //linearized bitmaps (variable length) - - protected int[] _skiplist; - - public ColGroupBitmap(CompressionType type) { - super(type, (int[]) null, -1); - } - - /** - * Main constructor. Stores the headers for the individual bitmaps. - * - * @param colIndices - * indices (within the block) of the columns included in this - * column - * @param numRows - * total number of rows in the parent block - * @param ubm - * Uncompressed bitmap representation of the block - */ - public ColGroupBitmap(CompressionType type, int[] colIndices, int numRows, UncompressedBitmap ubm) - { - super(type, colIndices, numRows); - - // Extract and store just the distinct values. The bitmaps themselves go - // into the subclasses. - final int numCols = ubm.getNumColumns(); - final int numVals = ubm.getNumValues(); - - _values = new double[numVals*numCols]; - for (int i=0; i<numVals; i++) { - //note: deep copied internally on getValues - double[] tmp = ubm.getValues(i); - System.arraycopy(tmp, 0, _values, i*numCols, numCols); - } - } - - /** - * Constructor for subclass methods that need to create shallow copies - * - * @param colIndices - * raw column index information - * @param numRows - * number of rows in the block - * @param values - * set of distinct values for the block (associated bitmaps are - * kept in the subclass) - */ - protected ColGroupBitmap(CompressionType type, int[] colIndices, int numRows, double[] values) { - super(type, colIndices, numRows); - _values = values; - } - - protected final int len(int k) { - return _ptr[k+1] - _ptr[k]; - } - - /** - * - * @param numVals - * @param totalLen - * @param lbitmaps - */ - protected void createCompressedBitmaps(int numVals, int totalLen, char[][] lbitmaps) - { - // compact bitmaps to linearized representation - if( LOW_LEVEL_OPT && SORT_VALUES_BY_LENGTH - && _numRows > BitmapEncoder.BITMAP_BLOCK_SZ ) - { - // sort value by num segments in descending order - TreeMap<Integer,ArrayList<Integer>> tree = new TreeMap<Integer, ArrayList<Integer>>(); - for( int i=0; i<numVals; i++ ) { - int revlen = totalLen-lbitmaps[i].length; - if( !tree.containsKey(revlen) ) - tree.put(revlen, new ArrayList<Integer>()); - tree.get(revlen).add(i); - } - - // compact bitmaps to linearized representation - _ptr = new int[numVals+1]; - _data = new char[totalLen]; - int pos = 0, off = 0; - for( Entry<Integer,ArrayList<Integer>> e : tree.entrySet() ) { - for( Integer tmpix : e.getValue() ) { - int len = lbitmaps[tmpix].length; - _ptr[pos] = off; - System.arraycopy(lbitmaps[tmpix], 0, _data, off, len); - off += len; - pos++; - } - } - _ptr[numVals] = totalLen; - - // reorder values - double[] lvalues = new double[_values.length]; - int off2 = 0; int numCols = _colIndexes.length; - for( Entry<Integer,ArrayList<Integer>> e : tree.entrySet() ) { - for( Integer tmpix : e.getValue() ) { - System.arraycopy(_values, tmpix*numCols, lvalues, off2, numCols); - off2 += numCols; - } - } - _values = lvalues; - } - else - { - // compact bitmaps to linearized representation - _ptr = new int[numVals+1]; - _data = new char[totalLen]; - for( int i=0, off=0; i<numVals; i++ ) { - int len = lbitmaps[i].length; - _ptr[i] = off; - System.arraycopy(lbitmaps[i], 0, _data, off, len); - off += len; - } - _ptr[numVals] = totalLen; - } - } - - @Override - public long estimateInMemorySize() { - long size = super.estimateInMemorySize(); - - // adding the size of values - size += 8; //array reference - if (_values != null) { - size += 32 + _values.length * 8; //values - } - - // adding bitmaps size - size += 16; //array references - if (_data != null) { - size += 32 + _ptr.length * 4; // offsets - size += 32 + _data.length * 2; // bitmaps - } - - return size; - } - - //generic decompression for OLE/RLE, to be overwritten for performance - @Override - public void decompressToBlock(MatrixBlock target) - { - final int numCols = getNumCols(); - final int numVals = getNumValues(); - int[] colIndices = getColIndices(); - - // Run through the bitmaps for this column group - for (int i = 0; i < numVals; i++) { - Iterator<Integer> decoder = getDecodeIterator(i); - int valOff = i*numCols; - - while (decoder.hasNext()) { - int row = decoder.next(); - for (int colIx = 0; colIx < numCols; colIx++) { - target.appendValue(row, colIndices[colIx], _values[valOff+colIx]); - } - } - } - } - - //generic decompression for OLE/RLE, to be overwritten for performance - @Override - public void decompressToBlock(MatrixBlock target, int[] colIndexTargets) - { - final int numCols = getNumCols(); - final int numVals = getNumValues(); - - // Run through the bitmaps for this column group - for (int i = 0; i < numVals; i++) { - Iterator<Integer> decoder = getDecodeIterator(i); - int valOff = i*numCols; - - while (decoder.hasNext()) { - int row = decoder.next(); - for (int colIx = 0; colIx < numCols; colIx++) { - int origMatrixColIx = getColIndex(colIx); - int targetColIx = colIndexTargets[origMatrixColIx]; - target.quickSetValue(row, targetColIx, _values[valOff+colIx]); - } - } - } - } - - //generic decompression for OLE/RLE, to be overwritten for performance - @Override - public void decompressToBlock(MatrixBlock target, int colpos) - { - final int numCols = getNumCols(); - final int numVals = getNumValues(); - - // Run through the bitmaps for this column group - for (int i = 0; i < numVals; i++) { - Iterator<Integer> decoder = getDecodeIterator(i); - int valOff = i*numCols; - - while (decoder.hasNext()) { - int row = decoder.next(); - target.quickSetValue(row, 0, _values[valOff+colpos]); - } - } - } - - /** - * - * @param bitmapIx - * @return - */ - protected double sumValues(int bitmapIx) - { - final int numCols = getNumCols(); - final int valOff = bitmapIx * numCols; - - double val = 0.0; - for( int i = 0; i < numCols; i++ ) { - val += _values[valOff+i]; - } - - return val; - } - - protected double sumValues(int bitmapIx, double[] b) - { - final int numCols = getNumCols(); - final int valOff = bitmapIx * numCols; - - double val = 0; - for( int i = 0; i < numCols; i++ ) { - val += _values[valOff+i] * b[i]; - } - - return val; - } - - /** - * - * @param b - * @param c - */ - protected void sumAllValues(double[] b, double[] c) - { - final int numVals = getNumValues(); - final int numCols = getNumCols(); - - //vectMultiplyAdd over cols instead of dotProduct over vals because - //usually more values than columns - for( int i=0, off=0; i<numCols; i++, off+=numVals ) - LinearAlgebraUtils.vectMultiplyAdd(b[i], _values, c, off, 0, numVals); - } - - /** - * Method for use by subclasses. Applies a scalar operation to the value - * metadata stored in the superclass. - * - * @param op - * scalar operation to perform - * @return transformed copy of value metadata for this column group - * @throws DMLRuntimeException - */ - protected double[] applyScalarOp(ScalarOperator op) - throws DMLRuntimeException - { - //scan over linearized values - double[] ret = new double[_values.length]; - for (int i = 0; i < _values.length; i++) { - ret[i] = op.executeScalar(_values[i]); - } - - return ret; - } - - /** - * - * @param op - * @param newVal - * @param numCols - * @return - * @throws DMLRuntimeException - */ - protected double[] applyScalarOp(ScalarOperator op, double newVal, int numCols) - throws DMLRuntimeException - { - //scan over linearized values - double[] ret = new double[_values.length + numCols]; - for( int i = 0; i < _values.length; i++ ) { - ret[i] = op.executeScalar(_values[i]); - } - - //add new value to the end - Arrays.fill(ret, _values.length, _values.length+numCols, newVal); - - return ret; - } - - /** - * @return the number of distinct sets of values associated with the bitmaps - * in this column group - */ - public int getNumValues() { - return _values.length / _colIndexes.length; - } - - /** - * - * @return - */ - public double[] getValues() { - return _values; - } - - /** - * - * @return - */ - public char[] getBitmaps() { - return _data; - } - - public int[] getBitmapOffsets() { - return _ptr; - } - - /** - * @param bmpIx - * index of a specific compressed bitmap (stored in subclass, - * index same as {@link #values}) - * @return an object for iterating over the row offsets in this bitmap. Only - * valid until the next call to this method. May be reused across - * calls. - */ - public abstract Iterator<Integer> getDecodeIterator(int bmpIx); - - /** - * Utility function of sparse-unsafe operations. - * - * @param ind - * @return - * @throws DMLRuntimeException - */ - protected int[] computeOffsets(boolean[] ind) - throws DMLRuntimeException - { - //determine number of offsets - int numOffsets = 0; - for( int i=0; i<ind.length; i++ ) - numOffsets += ind[i] ? 1 : 0; - - //create offset lists - int[] ret = new int[numOffsets]; - for( int i=0, pos=0; i<ind.length; i++ ) - if( ind[i] ) - ret[pos++] = i; - - return ret; - } - - @Override - public void readFields(DataInput in) - throws IOException - { - _numRows = in.readInt(); - int numCols = in.readInt(); - int numVals = in.readInt(); - - //read col indices - _colIndexes = new int[ numCols ]; - for( int i=0; i<numCols; i++ ) - _colIndexes[i] = in.readInt(); - - //read distinct values - _values = new double[numVals*numCols]; - for( int i=0; i<numVals*numCols; i++ ) - _values[i] = in.readDouble(); - - //read bitmaps - int totalLen = in.readInt(); - _ptr = new int[numVals+1]; - _data = new char[totalLen]; - for( int i=0, off=0; i<numVals; i++ ) { - int len = in.readInt(); - _ptr[i] = off; - for( int j=0; j<len; j++ ) - _data[off+j] = in.readChar(); - off += len; - } - _ptr[numVals] = totalLen; - } - - @Override - public void write(DataOutput out) - throws IOException - { - int numCols = getNumCols(); - int numVals = getNumValues(); - out.writeInt(_numRows); - out.writeInt(numCols); - out.writeInt(numVals); - - //write col indices - for( int i=0; i<_colIndexes.length; i++ ) - out.writeInt( _colIndexes[i] ); - - //write distinct values - for( int i=0; i<_values.length; i++ ) - out.writeDouble(_values[i]); - - //write bitmaps (lens and data, offset later recreated) - int totalLen = 0; - for( int i=0; i<numVals; i++ ) - totalLen += len(i); - out.writeInt(totalLen); - for( int i=0; i<numVals; i++ ) { - int len = len(i); - int off = _ptr[i]; - out.writeInt(len); - for( int j=0; j<len; j++ ) - out.writeChar(_data[off+j]); - } - } - - @Override - public long getExactSizeOnDisk() { - long ret = 12; //header - //col indices - ret += 4 * _colIndexes.length; - //distinct values (groups of values) - ret += 8 * _values.length; - //actual bitmaps - ret += 4; //total length - for( int i=0; i<getNumValues(); i++ ) - ret += 4 + 2 * len(i); - - return ret; - } -} +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.sysml.runtime.compress; + +import java.io.DataInput; +import java.io.DataOutput; +import java.io.IOException; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Iterator; +import java.util.TreeMap; +import java.util.Map.Entry; + +import org.apache.sysml.runtime.DMLRuntimeException; +import org.apache.sysml.runtime.compress.utils.LinearAlgebraUtils; +import org.apache.sysml.runtime.matrix.data.MatrixBlock; +import org.apache.sysml.runtime.matrix.operators.ScalarOperator; + + +/** + * Base class for column groups encoded with various types of bitmap encoding. + * + * + * NOTES: + * * OLE: separate storage segment length and bitmaps led to a 30% improvement + * but not applied because more difficult to support both data layouts at the + * same time (distributed/local as well as w/ and w/o low-level opt) + */ +public abstract class ColGroupBitmap extends ColGroup +{ + private static final long serialVersionUID = -1635828933479403125L; + + public static final boolean LOW_LEVEL_OPT = true; + //sorting of values by physical length helps by 10-20%, especially for serial, while + //slight performance decrease for parallel incl multi-threaded, hence not applied for + //distributed operations (also because compression time + garbage collection increases) + private static final boolean SORT_VALUES_BY_LENGTH = true; + protected static final boolean CREATE_SKIPLIST = true; + + protected static final int READ_CACHE_BLKSZ = 2 * BitmapEncoder.BITMAP_BLOCK_SZ; + protected static final int WRITE_CACHE_BLKSZ = 2 * BitmapEncoder.BITMAP_BLOCK_SZ; + + /** Distinct values associated with individual bitmaps. */ + protected double[] _values; //linearized <numcol vals> <numcol vals> + + /** Bitmaps, one per uncompressed value in {@link #values}. */ + protected int[] _ptr; //bitmap offsets per value + protected char[] _data; //linearized bitmaps (variable length) + + protected int[] _skiplist; + + public ColGroupBitmap(CompressionType type) { + super(type, (int[]) null, -1); + } + + /** + * Main constructor. Stores the headers for the individual bitmaps. + * + * @param colIndices + * indices (within the block) of the columns included in this + * column + * @param numRows + * total number of rows in the parent block + * @param ubm + * Uncompressed bitmap representation of the block + */ + public ColGroupBitmap(CompressionType type, int[] colIndices, int numRows, UncompressedBitmap ubm) + { + super(type, colIndices, numRows); + + // Extract and store just the distinct values. The bitmaps themselves go + // into the subclasses. + final int numCols = ubm.getNumColumns(); + final int numVals = ubm.getNumValues(); + + _values = new double[numVals*numCols]; + for (int i=0; i<numVals; i++) { + //note: deep copied internally on getValues + double[] tmp = ubm.getValues(i); + System.arraycopy(tmp, 0, _values, i*numCols, numCols); + } + } + + /** + * Constructor for subclass methods that need to create shallow copies + * + * @param colIndices + * raw column index information + * @param numRows + * number of rows in the block + * @param values + * set of distinct values for the block (associated bitmaps are + * kept in the subclass) + */ + protected ColGroupBitmap(CompressionType type, int[] colIndices, int numRows, double[] values) { + super(type, colIndices, numRows); + _values = values; + } + + protected final int len(int k) { + return _ptr[k+1] - _ptr[k]; + } + + /** + * + * @param numVals + * @param totalLen + * @param lbitmaps + */ + protected void createCompressedBitmaps(int numVals, int totalLen, char[][] lbitmaps) + { + // compact bitmaps to linearized representation + if( LOW_LEVEL_OPT && SORT_VALUES_BY_LENGTH + && _numRows > BitmapEncoder.BITMAP_BLOCK_SZ ) + { + // sort value by num segments in descending order + TreeMap<Integer,ArrayList<Integer>> tree = new TreeMap<Integer, ArrayList<Integer>>(); + for( int i=0; i<numVals; i++ ) { + int revlen = totalLen-lbitmaps[i].length; + if( !tree.containsKey(revlen) ) + tree.put(revlen, new ArrayList<Integer>()); + tree.get(revlen).add(i); + } + + // compact bitmaps to linearized representation + _ptr = new int[numVals+1]; + _data = new char[totalLen]; + int pos = 0, off = 0; + for( Entry<Integer,ArrayList<Integer>> e : tree.entrySet() ) { + for( Integer tmpix : e.getValue() ) { + int len = lbitmaps[tmpix].length; + _ptr[pos] = off; + System.arraycopy(lbitmaps[tmpix], 0, _data, off, len); + off += len; + pos++; + } + } + _ptr[numVals] = totalLen; + + // reorder values + double[] lvalues = new double[_values.length]; + int off2 = 0; int numCols = _colIndexes.length; + for( Entry<Integer,ArrayList<Integer>> e : tree.entrySet() ) { + for( Integer tmpix : e.getValue() ) { + System.arraycopy(_values, tmpix*numCols, lvalues, off2, numCols); + off2 += numCols; + } + } + _values = lvalues; + } + else + { + // compact bitmaps to linearized representation + _ptr = new int[numVals+1]; + _data = new char[totalLen]; + for( int i=0, off=0; i<numVals; i++ ) { + int len = lbitmaps[i].length; + _ptr[i] = off; + System.arraycopy(lbitmaps[i], 0, _data, off, len); + off += len; + } + _ptr[numVals] = totalLen; + } + } + + @Override + public long estimateInMemorySize() { + long size = super.estimateInMemorySize(); + + // adding the size of values + size += 8; //array reference + if (_values != null) { + size += 32 + _values.length * 8; //values + } + + // adding bitmaps size + size += 16; //array references + if (_data != null) { + size += 32 + _ptr.length * 4; // offsets + size += 32 + _data.length * 2; // bitmaps + } + + return size; + } + + //generic decompression for OLE/RLE, to be overwritten for performance + @Override + public void decompressToBlock(MatrixBlock target) + { + final int numCols = getNumCols(); + final int numVals = getNumValues(); + int[] colIndices = getColIndices(); + + // Run through the bitmaps for this column group + for (int i = 0; i < numVals; i++) { + Iterator<Integer> decoder = getDecodeIterator(i); + int valOff = i*numCols; + + while (decoder.hasNext()) { + int row = decoder.next(); + for (int colIx = 0; colIx < numCols; colIx++) { + target.appendValue(row, colIndices[colIx], _values[valOff+colIx]); + } + } + } + } + + //generic decompression for OLE/RLE, to be overwritten for performance + @Override + public void decompressToBlock(MatrixBlock target, int[] colIndexTargets) + { + final int numCols = getNumCols(); + final int numVals = getNumValues(); + + // Run through the bitmaps for this column group + for (int i = 0; i < numVals; i++) { + Iterator<Integer> decoder = getDecodeIterator(i); + int valOff = i*numCols; + + while (decoder.hasNext()) { + int row = decoder.next(); + for (int colIx = 0; colIx < numCols; colIx++) { + int origMatrixColIx = getColIndex(colIx); + int targetColIx = colIndexTargets[origMatrixColIx]; + target.quickSetValue(row, targetColIx, _values[valOff+colIx]); + } + } + } + } + + //generic decompression for OLE/RLE, to be overwritten for performance + @Override + public void decompressToBlock(MatrixBlock target, int colpos) + { + final int numCols = getNumCols(); + final int numVals = getNumValues(); + + // Run through the bitmaps for this column group + for (int i = 0; i < numVals; i++) { + Iterator<Integer> decoder = getDecodeIterator(i); + int valOff = i*numCols; + + while (decoder.hasNext()) { + int row = decoder.next(); + target.quickSetValue(row, 0, _values[valOff+colpos]); + } + } + } + + /** + * + * @param bitmapIx + * @return + */ + protected double sumValues(int bitmapIx) + { + final int numCols = getNumCols(); + final int valOff = bitmapIx * numCols; + + double val = 0.0; + for( int i = 0; i < numCols; i++ ) { + val += _values[valOff+i]; + } + + return val; + } + + protected double sumValues(int bitmapIx, double[] b) + { + final int numCols = getNumCols(); + final int valOff = bitmapIx * numCols; + + double val = 0; + for( int i = 0; i < numCols; i++ ) { + val += _values[valOff+i] * b[i]; + } + + return val; + } + + /** + * + * @param b + * @param c + */ + protected void sumAllValues(double[] b, double[] c) + { + final int numVals = getNumValues(); + final int numCols = getNumCols(); + + //vectMultiplyAdd over cols instead of dotProduct over vals because + //usually more values than columns + for( int i=0, off=0; i<numCols; i++, off+=numVals ) + LinearAlgebraUtils.vectMultiplyAdd(b[i], _values, c, off, 0, numVals); + } + + /** + * Method for use by subclasses. Applies a scalar operation to the value + * metadata stored in the superclass. + * + * @param op + * scalar operation to perform + * @return transformed copy of value metadata for this column group + * @throws DMLRuntimeException + */ + protected double[] applyScalarOp(ScalarOperator op) + throws DMLRuntimeException + { + //scan over linearized values + double[] ret = new double[_values.length]; + for (int i = 0; i < _values.length; i++) { + ret[i] = op.executeScalar(_values[i]); + } + + return ret; + } + + /** + * + * @param op + * @param newVal + * @param numCols + * @return + * @throws DMLRuntimeException + */ + protected double[] applyScalarOp(ScalarOperator op, double newVal, int numCols) + throws DMLRuntimeException + { + //scan over linearized values + double[] ret = new double[_values.length + numCols]; + for( int i = 0; i < _values.length; i++ ) { + ret[i] = op.executeScalar(_values[i]); + } + + //add new value to the end + Arrays.fill(ret, _values.length, _values.length+numCols, newVal); + + return ret; + } + + /** + * @return the number of distinct sets of values associated with the bitmaps + * in this column group + */ + public int getNumValues() { + return _values.length / _colIndexes.length; + } + + /** + * + * @return + */ + public double[] getValues() { + return _values; + } + + /** + * + * @return + */ + public char[] getBitmaps() { + return _data; + } + + public int[] getBitmapOffsets() { + return _ptr; + } + + /** + * @param bmpIx + * index of a specific compressed bitmap (stored in subclass, + * index same as {@link #values}) + * @return an object for iterating over the row offsets in this bitmap. Only + * valid until the next call to this method. May be reused across + * calls. + */ + public abstract Iterator<Integer> getDecodeIterator(int bmpIx); + + /** + * Utility function of sparse-unsafe operations. + * + * @param ind + * @return + * @throws DMLRuntimeException + */ + protected int[] computeOffsets(boolean[] ind) + throws DMLRuntimeException + { + //determine number of offsets + int numOffsets = 0; + for( int i=0; i<ind.length; i++ ) + numOffsets += ind[i] ? 1 : 0; + + //create offset lists + int[] ret = new int[numOffsets]; + for( int i=0, pos=0; i<ind.length; i++ ) + if( ind[i] ) + ret[pos++] = i; + + return ret; + } + + @Override + public void readFields(DataInput in) + throws IOException + { + _numRows = in.readInt(); + int numCols = in.readInt(); + int numVals = in.readInt(); + + //read col indices + _colIndexes = new int[ numCols ]; + for( int i=0; i<numCols; i++ ) + _colIndexes[i] = in.readInt(); + + //read distinct values + _values = new double[numVals*numCols]; + for( int i=0; i<numVals*numCols; i++ ) + _values[i] = in.readDouble(); + + //read bitmaps + int totalLen = in.readInt(); + _ptr = new int[numVals+1]; + _data = new char[totalLen]; + for( int i=0, off=0; i<numVals; i++ ) { + int len = in.readInt(); + _ptr[i] = off; + for( int j=0; j<len; j++ ) + _data[off+j] = in.readChar(); + off += len; + } + _ptr[numVals] = totalLen; + } + + @Override + public void write(DataOutput out) + throws IOException + { + int numCols = getNumCols(); + int numVals = getNumValues(); + out.writeInt(_numRows); + out.writeInt(numCols); + out.writeInt(numVals); + + //write col indices + for( int i=0; i<_colIndexes.length; i++ ) + out.writeInt( _colIndexes[i] ); + + //write distinct values + for( int i=0; i<_values.length; i++ ) + out.writeDouble(_values[i]); + + //write bitmaps (lens and data, offset later recreated) + int totalLen = 0; + for( int i=0; i<numVals; i++ ) + totalLen += len(i); + out.writeInt(totalLen); + for( int i=0; i<numVals; i++ ) { + int len = len(i); + int off = _ptr[i]; + out.writeInt(len); + for( int j=0; j<len; j++ ) + out.writeChar(_data[off+j]); + } + } + + @Override + public long getExactSizeOnDisk() { + long ret = 12; //header + //col indices + ret += 4 * _colIndexes.length; + //distinct values (groups of values) + ret += 8 * _values.length; + //actual bitmaps + ret += 4; //total length + for( int i=0; i<getNumValues(); i++ ) + ret += 4 + 2 * len(i); + + return ret; + } +}
