http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/da318739/src/main/java/org/apache/sysml/runtime/compress/PlanningBinPacker.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/runtime/compress/PlanningBinPacker.java b/src/main/java/org/apache/sysml/runtime/compress/PlanningBinPacker.java index a76223c..2764664 100644 --- a/src/main/java/org/apache/sysml/runtime/compress/PlanningBinPacker.java +++ b/src/main/java/org/apache/sysml/runtime/compress/PlanningBinPacker.java @@ -1,191 +1,191 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.sysml.runtime.compress; - -import java.util.ArrayList; -import java.util.Arrays; -import java.util.Comparator; -import java.util.List; -import java.util.Map; -import java.util.TreeMap; - -import org.apache.commons.math3.random.RandomDataGenerator; - -/** - * Used for the finding columns to co-code - * - */ -public class PlanningBinPacker -{ - private final float _binWeight; - private final List<Integer> _items; - private final List<Float> _itemWeights; - - public PlanningBinPacker(float binWeight, List<Integer> items, List<Float> itemWeights) { - _binWeight = binWeight; - _items = items; - _itemWeights = itemWeights; - } - - /** - * NOTE: upper bound is 17/10 OPT - * - * @return key: available space, value: list of the bins that have that free space - */ - public TreeMap<Float, List<List<Integer>>> packFirstFit() { - return packFirstFit(_items, _itemWeights); - } - - /** - * shuffling the items to make some potential for having bins of different - * sizes when consecutive columns are of close cardinalities - * - * @return key: available space, value: list of the bins that have that free - * space - */ - public TreeMap<Float, List<List<Integer>>> packFirstFitShuffled() { - RandomDataGenerator rnd = new RandomDataGenerator(); - int[] permutation = rnd.nextPermutation(_items.size(), _items.size()); - List<Integer> shuffledItems = new ArrayList<Integer>(_items.size()); - List<Float> shuffledWeights = new ArrayList<Float>(_items.size()); - for (int ix : permutation) { - shuffledItems.add(_items.get(ix)); - shuffledWeights.add(_itemWeights.get(ix)); - } - - return packFirstFit(shuffledItems, shuffledWeights); - } - - /** - * - * @param items - * @param itemWeights - * @return - */ - private TreeMap<Float, List<List<Integer>>> packFirstFit(List<Integer> items, List<Float> itemWeights) - { - // when searching for a bin, the first bin in the list is used - TreeMap<Float, List<List<Integer>>> bins = new TreeMap<Float, List<List<Integer>>>(); - // first bin - bins.put(_binWeight, createBinList()); - int numItems = items.size(); - for (int i = 0; i < numItems; i++) { - float itemWeight = itemWeights.get(i); - Map.Entry<Float, List<List<Integer>>> entry = bins - .ceilingEntry(itemWeight); - if (entry == null) { - // new bin - float newBinWeight = _binWeight - itemWeight; - List<List<Integer>> binList = bins.get(newBinWeight); - if (binList == null) { - bins.put(newBinWeight, createBinList(items.get(i))); - } else { - List<Integer> newBin = new ArrayList<Integer>(); - newBin.add(items.get(i)); - binList.add(newBin); - } - } else { - // add to the first bin in the list - List<Integer> assignedBin = entry.getValue().remove(0); - assignedBin.add(items.get(i)); - if (entry.getValue().size() == 0) - bins.remove(entry.getKey()); - float newBinWeight = entry.getKey() - itemWeight; - List<List<Integer>> newBinsList = bins.get(newBinWeight); - if (newBinsList == null) { - // new bin - bins.put(newBinWeight, createBinList(assignedBin)); - } else { - newBinsList.add(assignedBin); - } - } - } - return bins; - } - - /** - * NOTE: upper bound is 11/9 OPT + 6/9 (~1.22 OPT) - * - * @return - */ - public TreeMap<Float, List<List<Integer>>> packFirstFitDescending() { - // sort items descending based on their weights - Integer[] indexes = new Integer[_items.size()]; - for (int i = 0; i < indexes.length; i++) - indexes[i] = i; - Arrays.sort(indexes, new Comparator<Integer>() { - - @Override - public int compare(Integer o1, Integer o2) { - return _itemWeights.get(o1).compareTo(_itemWeights.get(o2)); - } - }); - List<Integer> sortedItems = new ArrayList<Integer>(); - List<Float> sortedItemWeights = new ArrayList<Float>(); - for (int i = indexes.length - 1; i >= 0; i--) { - sortedItems.add(_items.get(i)); - sortedItemWeights.add(_itemWeights.get(i)); - } - return packFirstFit(sortedItems, sortedItemWeights); - } - - /** - * NOTE: upper bound is 71/60 OPT + 6/9 (~1.18 OPT) - * - * @return - */ - public TreeMap<Float, List<List<Integer>>> packModifiedFirstFitDescending() { - throw new UnsupportedOperationException("Not implemented yet!"); - } - - /** - * - * @return - */ - private List<List<Integer>> createBinList() { - List<List<Integer>> binList = new ArrayList<List<Integer>>(); - binList.add(new ArrayList<Integer>()); - return binList; - } - - /** - * - * @param item - * @return - */ - private List<List<Integer>> createBinList(int item) { - List<List<Integer>> binList = new ArrayList<List<Integer>>(); - List<Integer> bin = new ArrayList<Integer>(); - binList.add(bin); - bin.add(item); - return binList; - } - - /** - * - * @param bin - * @return - */ - private List<List<Integer>> createBinList(List<Integer> bin) { - List<List<Integer>> binList = new ArrayList<List<Integer>>(); - binList.add(bin); - return binList; - } -} +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.sysml.runtime.compress; + +import java.util.ArrayList; +import java.util.Arrays; +import java.util.Comparator; +import java.util.List; +import java.util.Map; +import java.util.TreeMap; + +import org.apache.commons.math3.random.RandomDataGenerator; + +/** + * Used for the finding columns to co-code + * + */ +public class PlanningBinPacker +{ + private final float _binWeight; + private final List<Integer> _items; + private final List<Float> _itemWeights; + + public PlanningBinPacker(float binWeight, List<Integer> items, List<Float> itemWeights) { + _binWeight = binWeight; + _items = items; + _itemWeights = itemWeights; + } + + /** + * NOTE: upper bound is 17/10 OPT + * + * @return key: available space, value: list of the bins that have that free space + */ + public TreeMap<Float, List<List<Integer>>> packFirstFit() { + return packFirstFit(_items, _itemWeights); + } + + /** + * shuffling the items to make some potential for having bins of different + * sizes when consecutive columns are of close cardinalities + * + * @return key: available space, value: list of the bins that have that free + * space + */ + public TreeMap<Float, List<List<Integer>>> packFirstFitShuffled() { + RandomDataGenerator rnd = new RandomDataGenerator(); + int[] permutation = rnd.nextPermutation(_items.size(), _items.size()); + List<Integer> shuffledItems = new ArrayList<Integer>(_items.size()); + List<Float> shuffledWeights = new ArrayList<Float>(_items.size()); + for (int ix : permutation) { + shuffledItems.add(_items.get(ix)); + shuffledWeights.add(_itemWeights.get(ix)); + } + + return packFirstFit(shuffledItems, shuffledWeights); + } + + /** + * + * @param items + * @param itemWeights + * @return + */ + private TreeMap<Float, List<List<Integer>>> packFirstFit(List<Integer> items, List<Float> itemWeights) + { + // when searching for a bin, the first bin in the list is used + TreeMap<Float, List<List<Integer>>> bins = new TreeMap<Float, List<List<Integer>>>(); + // first bin + bins.put(_binWeight, createBinList()); + int numItems = items.size(); + for (int i = 0; i < numItems; i++) { + float itemWeight = itemWeights.get(i); + Map.Entry<Float, List<List<Integer>>> entry = bins + .ceilingEntry(itemWeight); + if (entry == null) { + // new bin + float newBinWeight = _binWeight - itemWeight; + List<List<Integer>> binList = bins.get(newBinWeight); + if (binList == null) { + bins.put(newBinWeight, createBinList(items.get(i))); + } else { + List<Integer> newBin = new ArrayList<Integer>(); + newBin.add(items.get(i)); + binList.add(newBin); + } + } else { + // add to the first bin in the list + List<Integer> assignedBin = entry.getValue().remove(0); + assignedBin.add(items.get(i)); + if (entry.getValue().size() == 0) + bins.remove(entry.getKey()); + float newBinWeight = entry.getKey() - itemWeight; + List<List<Integer>> newBinsList = bins.get(newBinWeight); + if (newBinsList == null) { + // new bin + bins.put(newBinWeight, createBinList(assignedBin)); + } else { + newBinsList.add(assignedBin); + } + } + } + return bins; + } + + /** + * NOTE: upper bound is 11/9 OPT + 6/9 (~1.22 OPT) + * + * @return + */ + public TreeMap<Float, List<List<Integer>>> packFirstFitDescending() { + // sort items descending based on their weights + Integer[] indexes = new Integer[_items.size()]; + for (int i = 0; i < indexes.length; i++) + indexes[i] = i; + Arrays.sort(indexes, new Comparator<Integer>() { + + @Override + public int compare(Integer o1, Integer o2) { + return _itemWeights.get(o1).compareTo(_itemWeights.get(o2)); + } + }); + List<Integer> sortedItems = new ArrayList<Integer>(); + List<Float> sortedItemWeights = new ArrayList<Float>(); + for (int i = indexes.length - 1; i >= 0; i--) { + sortedItems.add(_items.get(i)); + sortedItemWeights.add(_itemWeights.get(i)); + } + return packFirstFit(sortedItems, sortedItemWeights); + } + + /** + * NOTE: upper bound is 71/60 OPT + 6/9 (~1.18 OPT) + * + * @return + */ + public TreeMap<Float, List<List<Integer>>> packModifiedFirstFitDescending() { + throw new UnsupportedOperationException("Not implemented yet!"); + } + + /** + * + * @return + */ + private List<List<Integer>> createBinList() { + List<List<Integer>> binList = new ArrayList<List<Integer>>(); + binList.add(new ArrayList<Integer>()); + return binList; + } + + /** + * + * @param item + * @return + */ + private List<List<Integer>> createBinList(int item) { + List<List<Integer>> binList = new ArrayList<List<Integer>>(); + List<Integer> bin = new ArrayList<Integer>(); + binList.add(bin); + bin.add(item); + return binList; + } + + /** + * + * @param bin + * @return + */ + private List<List<Integer>> createBinList(List<Integer> bin) { + List<List<Integer>> binList = new ArrayList<List<Integer>>(); + binList.add(bin); + return binList; + } +}
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/da318739/src/main/java/org/apache/sysml/runtime/compress/PlanningCoCodingGroup.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/runtime/compress/PlanningCoCodingGroup.java b/src/main/java/org/apache/sysml/runtime/compress/PlanningCoCodingGroup.java index 221e4ca..6553c20 100644 --- a/src/main/java/org/apache/sysml/runtime/compress/PlanningCoCodingGroup.java +++ b/src/main/java/org/apache/sysml/runtime/compress/PlanningCoCodingGroup.java @@ -1,104 +1,104 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.sysml.runtime.compress; - -import java.util.Arrays; - -import org.apache.sysml.runtime.compress.estim.CompressedSizeEstimator; -import org.apache.sysml.runtime.compress.estim.CompressedSizeInfo; - -/** - * Class to represent information about co-coding a group of columns. - * - */ -public class PlanningCoCodingGroup -{ - private int[] _colIndexes; - private long _estSize; - private float _cardRatio; - - /** - * Constructor for a one-column group; i.e. do not co-code a given column. - * - */ - public PlanningCoCodingGroup(int col, long estSize, float cardRatio) { - _colIndexes = new int[]{col}; - _estSize = estSize; - _cardRatio = cardRatio; - } - - /** - * Constructor for merging two disjoint groups of columns - * - * @param grp1 first group of columns to merge - * @param grp2 second group to merge - * @param numRowsWeight numRows x sparsity - */ - public PlanningCoCodingGroup(PlanningCoCodingGroup grp1, PlanningCoCodingGroup grp2, - CompressedSizeEstimator bitmapSizeEstimator, float numRowsWeight) - { - // merge sorted non-empty arrays - _colIndexes = new int[grp1._colIndexes.length + grp2._colIndexes.length]; - int grp1Ptr = 0, grp2Ptr = 0; - for (int mergedIx = 0; mergedIx < _colIndexes.length; mergedIx++) { - if (grp1._colIndexes[grp1Ptr] < grp2._colIndexes[grp2Ptr]) { - _colIndexes[mergedIx] = grp1._colIndexes[grp1Ptr++]; - if (grp1Ptr == grp1._colIndexes.length) { - System.arraycopy(grp2._colIndexes, grp2Ptr, _colIndexes, - mergedIx + 1, grp2._colIndexes.length - grp2Ptr); - break; - } - } else { - _colIndexes[mergedIx] = grp2._colIndexes[grp2Ptr++]; - if (grp2Ptr == grp2._colIndexes.length) { - System.arraycopy(grp1._colIndexes, grp1Ptr, _colIndexes, - mergedIx + 1, grp1._colIndexes.length - grp1Ptr); - break; - } - } - } - - // estimating size info - CompressedSizeInfo groupSizeInfo = bitmapSizeEstimator - .estimateCompressedColGroupSize(_colIndexes); - _estSize = groupSizeInfo.getMinSize(); - _cardRatio = groupSizeInfo.getEstCarinality() / numRowsWeight; - } - - public int[] getColIndices() { - return _colIndexes; - } - - /** - * @return estimated compressed size of the grouped columns - */ - public long getEstSize() { - return _estSize; - } - - public float getCardinalityRatio() { - return _cardRatio; - } - - @Override - public String toString() { - return Arrays.toString(_colIndexes); - } +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.sysml.runtime.compress; + +import java.util.Arrays; + +import org.apache.sysml.runtime.compress.estim.CompressedSizeEstimator; +import org.apache.sysml.runtime.compress.estim.CompressedSizeInfo; + +/** + * Class to represent information about co-coding a group of columns. + * + */ +public class PlanningCoCodingGroup +{ + private int[] _colIndexes; + private long _estSize; + private float _cardRatio; + + /** + * Constructor for a one-column group; i.e. do not co-code a given column. + * + */ + public PlanningCoCodingGroup(int col, long estSize, float cardRatio) { + _colIndexes = new int[]{col}; + _estSize = estSize; + _cardRatio = cardRatio; + } + + /** + * Constructor for merging two disjoint groups of columns + * + * @param grp1 first group of columns to merge + * @param grp2 second group to merge + * @param numRowsWeight numRows x sparsity + */ + public PlanningCoCodingGroup(PlanningCoCodingGroup grp1, PlanningCoCodingGroup grp2, + CompressedSizeEstimator bitmapSizeEstimator, float numRowsWeight) + { + // merge sorted non-empty arrays + _colIndexes = new int[grp1._colIndexes.length + grp2._colIndexes.length]; + int grp1Ptr = 0, grp2Ptr = 0; + for (int mergedIx = 0; mergedIx < _colIndexes.length; mergedIx++) { + if (grp1._colIndexes[grp1Ptr] < grp2._colIndexes[grp2Ptr]) { + _colIndexes[mergedIx] = grp1._colIndexes[grp1Ptr++]; + if (grp1Ptr == grp1._colIndexes.length) { + System.arraycopy(grp2._colIndexes, grp2Ptr, _colIndexes, + mergedIx + 1, grp2._colIndexes.length - grp2Ptr); + break; + } + } else { + _colIndexes[mergedIx] = grp2._colIndexes[grp2Ptr++]; + if (grp2Ptr == grp2._colIndexes.length) { + System.arraycopy(grp1._colIndexes, grp1Ptr, _colIndexes, + mergedIx + 1, grp1._colIndexes.length - grp1Ptr); + break; + } + } + } + + // estimating size info + CompressedSizeInfo groupSizeInfo = bitmapSizeEstimator + .estimateCompressedColGroupSize(_colIndexes); + _estSize = groupSizeInfo.getMinSize(); + _cardRatio = groupSizeInfo.getEstCarinality() / numRowsWeight; + } + + public int[] getColIndices() { + return _colIndexes; + } + + /** + * @return estimated compressed size of the grouped columns + */ + public long getEstSize() { + return _estSize; + } + + public float getCardinalityRatio() { + return _cardRatio; + } + + @Override + public String toString() { + return Arrays.toString(_colIndexes); + } } \ No newline at end of file http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/da318739/src/main/java/org/apache/sysml/runtime/compress/PlanningGroupMergeAction.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/runtime/compress/PlanningGroupMergeAction.java b/src/main/java/org/apache/sysml/runtime/compress/PlanningGroupMergeAction.java index 5e3c6c5..47d46d5 100644 --- a/src/main/java/org/apache/sysml/runtime/compress/PlanningGroupMergeAction.java +++ b/src/main/java/org/apache/sysml/runtime/compress/PlanningGroupMergeAction.java @@ -1,73 +1,73 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.sysml.runtime.compress; - -import org.apache.sysml.runtime.compress.estim.CompressedSizeEstimator; - -/** - * Internal data structure for tracking potential merges of column groups in - * co-coding calculations. - * - */ -class PlanningGroupMergeAction implements Comparable<PlanningGroupMergeAction> -{ - private PlanningCoCodingGroup _leftGrp; //left input - private PlanningCoCodingGroup _rightGrp; //right input - private PlanningCoCodingGroup _mergedGrp; //output - private long _changeInSize; - - - public PlanningGroupMergeAction(CompressedSizeEstimator sizeEstimator, - float numRowsWeight, PlanningCoCodingGroup leftGrp, PlanningCoCodingGroup rightGrp) { - _leftGrp = leftGrp; - _rightGrp = rightGrp; - _mergedGrp = new PlanningCoCodingGroup(leftGrp, rightGrp, sizeEstimator, numRowsWeight); - - // Negative size change ==> Decrease in size - _changeInSize = _mergedGrp.getEstSize() - - leftGrp.getEstSize() - rightGrp.getEstSize(); - } - - public int compareTo(PlanningGroupMergeAction o) { - // We only sort by the change in size - return (int) Math.signum(_changeInSize - o._changeInSize); - } - - @Override - public String toString() { - return String.format("Merge %s and %s", _leftGrp, _rightGrp); - } - - public PlanningCoCodingGroup getLeftGrp() { - return _leftGrp; - } - - public PlanningCoCodingGroup getRightGrp() { - return _rightGrp; - } - - public PlanningCoCodingGroup getMergedGrp() { - return _mergedGrp; - } - - public long getChangeInSize() { - return _changeInSize; - } -} +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.sysml.runtime.compress; + +import org.apache.sysml.runtime.compress.estim.CompressedSizeEstimator; + +/** + * Internal data structure for tracking potential merges of column groups in + * co-coding calculations. + * + */ +class PlanningGroupMergeAction implements Comparable<PlanningGroupMergeAction> +{ + private PlanningCoCodingGroup _leftGrp; //left input + private PlanningCoCodingGroup _rightGrp; //right input + private PlanningCoCodingGroup _mergedGrp; //output + private long _changeInSize; + + + public PlanningGroupMergeAction(CompressedSizeEstimator sizeEstimator, + float numRowsWeight, PlanningCoCodingGroup leftGrp, PlanningCoCodingGroup rightGrp) { + _leftGrp = leftGrp; + _rightGrp = rightGrp; + _mergedGrp = new PlanningCoCodingGroup(leftGrp, rightGrp, sizeEstimator, numRowsWeight); + + // Negative size change ==> Decrease in size + _changeInSize = _mergedGrp.getEstSize() + - leftGrp.getEstSize() - rightGrp.getEstSize(); + } + + public int compareTo(PlanningGroupMergeAction o) { + // We only sort by the change in size + return (int) Math.signum(_changeInSize - o._changeInSize); + } + + @Override + public String toString() { + return String.format("Merge %s and %s", _leftGrp, _rightGrp); + } + + public PlanningCoCodingGroup getLeftGrp() { + return _leftGrp; + } + + public PlanningCoCodingGroup getRightGrp() { + return _rightGrp; + } + + public PlanningCoCodingGroup getMergedGrp() { + return _mergedGrp; + } + + public long getChangeInSize() { + return _changeInSize; + } +} http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/da318739/src/main/java/org/apache/sysml/runtime/compress/ReaderColumnSelection.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/runtime/compress/ReaderColumnSelection.java b/src/main/java/org/apache/sysml/runtime/compress/ReaderColumnSelection.java index a37018f..bd3c13c 100644 --- a/src/main/java/org/apache/sysml/runtime/compress/ReaderColumnSelection.java +++ b/src/main/java/org/apache/sysml/runtime/compress/ReaderColumnSelection.java @@ -1,64 +1,64 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.sysml.runtime.compress; - -import org.apache.sysml.runtime.compress.utils.DblArray; - -/** - * Base class for all column selection readers. - * - */ -public abstract class ReaderColumnSelection -{ - protected int[] _colIndexes = null; - protected int _numRows = -1; - protected int _lastRow = -1; - protected boolean _skipZeros = false; - - protected ReaderColumnSelection(int[] colIndexes, int numRows, boolean skipZeros) { - _colIndexes = colIndexes; - _numRows = numRows; - _lastRow = -1; - _skipZeros = skipZeros; - } - - /** - * Gets the next row, null when no more rows. - * - * @return - */ - public abstract DblArray nextRow(); - - /** - * - * @return - */ - public int getCurrentRowIndex() { - return _lastRow; - } - - - /** - * Resets the reader to the first row. - */ - public void reset() { - _lastRow = -1; - } -} +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.sysml.runtime.compress; + +import org.apache.sysml.runtime.compress.utils.DblArray; + +/** + * Base class for all column selection readers. + * + */ +public abstract class ReaderColumnSelection +{ + protected int[] _colIndexes = null; + protected int _numRows = -1; + protected int _lastRow = -1; + protected boolean _skipZeros = false; + + protected ReaderColumnSelection(int[] colIndexes, int numRows, boolean skipZeros) { + _colIndexes = colIndexes; + _numRows = numRows; + _lastRow = -1; + _skipZeros = skipZeros; + } + + /** + * Gets the next row, null when no more rows. + * + * @return + */ + public abstract DblArray nextRow(); + + /** + * + * @return + */ + public int getCurrentRowIndex() { + return _lastRow; + } + + + /** + * Resets the reader to the first row. + */ + public void reset() { + _lastRow = -1; + } +} http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/da318739/src/main/java/org/apache/sysml/runtime/compress/ReaderColumnSelectionDense.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/runtime/compress/ReaderColumnSelectionDense.java b/src/main/java/org/apache/sysml/runtime/compress/ReaderColumnSelectionDense.java index d22f39d..c39eeef 100644 --- a/src/main/java/org/apache/sysml/runtime/compress/ReaderColumnSelectionDense.java +++ b/src/main/java/org/apache/sysml/runtime/compress/ReaderColumnSelectionDense.java @@ -1,68 +1,68 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.sysml.runtime.compress; - -import org.apache.sysml.runtime.compress.utils.DblArray; -import org.apache.sysml.runtime.matrix.data.MatrixBlock; - -public class ReaderColumnSelectionDense extends ReaderColumnSelection -{ - protected MatrixBlock _data; - - // reusable return - private DblArray nonZeroReturn; - private DblArray reusableReturn; - private double[] reusableArr; - - public ReaderColumnSelectionDense(MatrixBlock data, int[] colIndices, boolean skipZeros) { - super(colIndices, CompressedMatrixBlock.TRANSPOSE_INPUT ? - data.getNumColumns() : data.getNumRows(), skipZeros); - _data = data; - reusableArr = new double[colIndices.length]; - reusableReturn = new DblArray(reusableArr); - } - - @Override - public DblArray nextRow() { - if( _skipZeros) { - while ((nonZeroReturn = getNextRow()) != null - && DblArray.isZero(nonZeroReturn)); - return nonZeroReturn; - } else { - return getNextRow(); - } - } - - /** - * - * @return - */ - private DblArray getNextRow() { - if(_lastRow == _numRows-1) - return null; - _lastRow++; - for (int i = 0; i < _colIndexes.length; i++) { - reusableArr[i] = CompressedMatrixBlock.TRANSPOSE_INPUT ? - _data.quickGetValue( _colIndexes[i], _lastRow ) : - _data.quickGetValue( _lastRow, _colIndexes[i] ); - } - return reusableReturn; - } -} +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.sysml.runtime.compress; + +import org.apache.sysml.runtime.compress.utils.DblArray; +import org.apache.sysml.runtime.matrix.data.MatrixBlock; + +public class ReaderColumnSelectionDense extends ReaderColumnSelection +{ + protected MatrixBlock _data; + + // reusable return + private DblArray nonZeroReturn; + private DblArray reusableReturn; + private double[] reusableArr; + + public ReaderColumnSelectionDense(MatrixBlock data, int[] colIndices, boolean skipZeros) { + super(colIndices, CompressedMatrixBlock.TRANSPOSE_INPUT ? + data.getNumColumns() : data.getNumRows(), skipZeros); + _data = data; + reusableArr = new double[colIndices.length]; + reusableReturn = new DblArray(reusableArr); + } + + @Override + public DblArray nextRow() { + if( _skipZeros) { + while ((nonZeroReturn = getNextRow()) != null + && DblArray.isZero(nonZeroReturn)); + return nonZeroReturn; + } else { + return getNextRow(); + } + } + + /** + * + * @return + */ + private DblArray getNextRow() { + if(_lastRow == _numRows-1) + return null; + _lastRow++; + for (int i = 0; i < _colIndexes.length; i++) { + reusableArr[i] = CompressedMatrixBlock.TRANSPOSE_INPUT ? + _data.quickGetValue( _colIndexes[i], _lastRow ) : + _data.quickGetValue( _lastRow, _colIndexes[i] ); + } + return reusableReturn; + } +} http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/da318739/src/main/java/org/apache/sysml/runtime/compress/ReaderColumnSelectionDenseSample.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/runtime/compress/ReaderColumnSelectionDenseSample.java b/src/main/java/org/apache/sysml/runtime/compress/ReaderColumnSelectionDenseSample.java index 06518e4..d8e50c0 100644 --- a/src/main/java/org/apache/sysml/runtime/compress/ReaderColumnSelectionDenseSample.java +++ b/src/main/java/org/apache/sysml/runtime/compress/ReaderColumnSelectionDenseSample.java @@ -1,86 +1,86 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.sysml.runtime.compress; - -import org.apache.sysml.runtime.compress.utils.DblArray; -import org.apache.sysml.runtime.matrix.data.MatrixBlock; - -/** - * - * considers only a subset of row indexes - */ -public class ReaderColumnSelectionDenseSample extends ReaderColumnSelection -{ - protected MatrixBlock _data; - - private int[] _sampleIndexes; - private int lastIndex = -1; - - // reusable return - private DblArray nonZeroReturn; - private DblArray reusableReturn; - private double[] reusableArr; - - public ReaderColumnSelectionDenseSample(MatrixBlock data, int[] colIndexes, int[] sampleIndexes, boolean skipZeros) - { - super(colIndexes, -1, skipZeros); - _data = data; - _sampleIndexes = sampleIndexes; - reusableArr = new double[colIndexes.length]; - reusableReturn = new DblArray(reusableArr); - } - - @Override - public DblArray nextRow() { - if (_skipZeros) { - while ((nonZeroReturn = getNextRow()) != null - && DblArray.isZero(nonZeroReturn)); - return nonZeroReturn; - } else { - return getNextRow(); - } - } - - /** - * - * @return - */ - private DblArray getNextRow() { - if (lastIndex == _sampleIndexes.length - 1) - return null; - lastIndex++; - for (int i = 0; i < _colIndexes.length; i++) { - reusableArr[i] = CompressedMatrixBlock.TRANSPOSE_INPUT ? - _data.quickGetValue(_colIndexes[i], _sampleIndexes[lastIndex]) : - _data.quickGetValue(_sampleIndexes[lastIndex], _colIndexes[i]); - } - return reusableReturn; - } - - @Override - public int getCurrentRowIndex() { - return _sampleIndexes[lastIndex]; - } - - @Override - public void reset() { - lastIndex = -1; - } -} +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.sysml.runtime.compress; + +import org.apache.sysml.runtime.compress.utils.DblArray; +import org.apache.sysml.runtime.matrix.data.MatrixBlock; + +/** + * + * considers only a subset of row indexes + */ +public class ReaderColumnSelectionDenseSample extends ReaderColumnSelection +{ + protected MatrixBlock _data; + + private int[] _sampleIndexes; + private int lastIndex = -1; + + // reusable return + private DblArray nonZeroReturn; + private DblArray reusableReturn; + private double[] reusableArr; + + public ReaderColumnSelectionDenseSample(MatrixBlock data, int[] colIndexes, int[] sampleIndexes, boolean skipZeros) + { + super(colIndexes, -1, skipZeros); + _data = data; + _sampleIndexes = sampleIndexes; + reusableArr = new double[colIndexes.length]; + reusableReturn = new DblArray(reusableArr); + } + + @Override + public DblArray nextRow() { + if (_skipZeros) { + while ((nonZeroReturn = getNextRow()) != null + && DblArray.isZero(nonZeroReturn)); + return nonZeroReturn; + } else { + return getNextRow(); + } + } + + /** + * + * @return + */ + private DblArray getNextRow() { + if (lastIndex == _sampleIndexes.length - 1) + return null; + lastIndex++; + for (int i = 0; i < _colIndexes.length; i++) { + reusableArr[i] = CompressedMatrixBlock.TRANSPOSE_INPUT ? + _data.quickGetValue(_colIndexes[i], _sampleIndexes[lastIndex]) : + _data.quickGetValue(_sampleIndexes[lastIndex], _colIndexes[i]); + } + return reusableReturn; + } + + @Override + public int getCurrentRowIndex() { + return _sampleIndexes[lastIndex]; + } + + @Override + public void reset() { + lastIndex = -1; + } +} http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/da318739/src/main/java/org/apache/sysml/runtime/compress/ReaderColumnSelectionSparse.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/runtime/compress/ReaderColumnSelectionSparse.java b/src/main/java/org/apache/sysml/runtime/compress/ReaderColumnSelectionSparse.java index d2ef5a4..c6adaee 100644 --- a/src/main/java/org/apache/sysml/runtime/compress/ReaderColumnSelectionSparse.java +++ b/src/main/java/org/apache/sysml/runtime/compress/ReaderColumnSelectionSparse.java @@ -1,115 +1,115 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.sysml.runtime.compress; - -import java.util.Arrays; - -import org.apache.sysml.runtime.compress.utils.DblArray; -import org.apache.sysml.runtime.matrix.data.MatrixBlock; -import org.apache.sysml.runtime.matrix.data.SparseRow; - -/** - * Used to extract the values at certain indexes from each row in a sparse - * matrix - * - * Keeps returning all-zeros arrays until reaching the last possible index. The - * current compression algorithm treats the zero-value in a sparse matrix like - * any other value. - */ -public class ReaderColumnSelectionSparse extends ReaderColumnSelection -{ - private final DblArray ZERO_DBL_ARRAY; - private DblArray nonZeroReturn; - - // reusable return - private DblArray reusableReturn; - private double[] reusableArr; - - // current sparse row positions - private SparseRow[] sparseCols = null; - private int[] sparsePos = null; - - public ReaderColumnSelectionSparse(MatrixBlock data, int[] colIndexes, boolean skipZeros) - { - super(colIndexes, CompressedMatrixBlock.TRANSPOSE_INPUT ? - data.getNumColumns() : data.getNumRows(), skipZeros); - ZERO_DBL_ARRAY = new DblArray(new double[colIndexes.length], true); - reusableArr = new double[colIndexes.length]; - reusableReturn = new DblArray(reusableArr); - - if( !CompressedMatrixBlock.TRANSPOSE_INPUT ){ - throw new RuntimeException("SparseColumnSelectionReader should not be used without transposed input."); - } - - sparseCols = new SparseRow[colIndexes.length]; - sparsePos = new int[colIndexes.length]; - if( data.getSparseBlock()!=null ) - for( int i=0; i<colIndexes.length; i++ ) - sparseCols[i] = data.getSparseBlock().get(colIndexes[i]); - Arrays.fill(sparsePos, 0); - } - - @Override - public DblArray nextRow() { - if(_skipZeros) { - while ((nonZeroReturn = getNextRow()) != null - && nonZeroReturn == ZERO_DBL_ARRAY); - return nonZeroReturn; - } else { - return getNextRow(); - } - } - - /** - * - * @return - */ - private DblArray getNextRow() - { - if(_lastRow == _numRows-1) - return null; - _lastRow++; - - if( !CompressedMatrixBlock.TRANSPOSE_INPUT ){ - throw new RuntimeException("SparseColumnSelectionReader should not be used without transposed input."); - } - - //move pos to current row if necessary (for all columns) - for( int i=0; i<_colIndexes.length; i++ ) - if( sparseCols[i] != null && (sparseCols[i].indexes().length<=sparsePos[i] - || sparseCols[i].indexes()[sparsePos[i]]<_lastRow) ) - { - sparsePos[i]++; - } - - //extract current values - Arrays.fill(reusableArr, 0); - boolean zeroResult = true; - for( int i=0; i<_colIndexes.length; i++ ) - if( sparseCols[i] != null && sparseCols[i].indexes().length>sparsePos[i] - &&sparseCols[i].indexes()[sparsePos[i]]==_lastRow ) - { - reusableArr[i] = sparseCols[i].values()[sparsePos[i]]; - zeroResult = false; - } - - return zeroResult ? ZERO_DBL_ARRAY : reusableReturn; - } -} +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.sysml.runtime.compress; + +import java.util.Arrays; + +import org.apache.sysml.runtime.compress.utils.DblArray; +import org.apache.sysml.runtime.matrix.data.MatrixBlock; +import org.apache.sysml.runtime.matrix.data.SparseRow; + +/** + * Used to extract the values at certain indexes from each row in a sparse + * matrix + * + * Keeps returning all-zeros arrays until reaching the last possible index. The + * current compression algorithm treats the zero-value in a sparse matrix like + * any other value. + */ +public class ReaderColumnSelectionSparse extends ReaderColumnSelection +{ + private final DblArray ZERO_DBL_ARRAY; + private DblArray nonZeroReturn; + + // reusable return + private DblArray reusableReturn; + private double[] reusableArr; + + // current sparse row positions + private SparseRow[] sparseCols = null; + private int[] sparsePos = null; + + public ReaderColumnSelectionSparse(MatrixBlock data, int[] colIndexes, boolean skipZeros) + { + super(colIndexes, CompressedMatrixBlock.TRANSPOSE_INPUT ? + data.getNumColumns() : data.getNumRows(), skipZeros); + ZERO_DBL_ARRAY = new DblArray(new double[colIndexes.length], true); + reusableArr = new double[colIndexes.length]; + reusableReturn = new DblArray(reusableArr); + + if( !CompressedMatrixBlock.TRANSPOSE_INPUT ){ + throw new RuntimeException("SparseColumnSelectionReader should not be used without transposed input."); + } + + sparseCols = new SparseRow[colIndexes.length]; + sparsePos = new int[colIndexes.length]; + if( data.getSparseBlock()!=null ) + for( int i=0; i<colIndexes.length; i++ ) + sparseCols[i] = data.getSparseBlock().get(colIndexes[i]); + Arrays.fill(sparsePos, 0); + } + + @Override + public DblArray nextRow() { + if(_skipZeros) { + while ((nonZeroReturn = getNextRow()) != null + && nonZeroReturn == ZERO_DBL_ARRAY); + return nonZeroReturn; + } else { + return getNextRow(); + } + } + + /** + * + * @return + */ + private DblArray getNextRow() + { + if(_lastRow == _numRows-1) + return null; + _lastRow++; + + if( !CompressedMatrixBlock.TRANSPOSE_INPUT ){ + throw new RuntimeException("SparseColumnSelectionReader should not be used without transposed input."); + } + + //move pos to current row if necessary (for all columns) + for( int i=0; i<_colIndexes.length; i++ ) + if( sparseCols[i] != null && (sparseCols[i].indexes().length<=sparsePos[i] + || sparseCols[i].indexes()[sparsePos[i]]<_lastRow) ) + { + sparsePos[i]++; + } + + //extract current values + Arrays.fill(reusableArr, 0); + boolean zeroResult = true; + for( int i=0; i<_colIndexes.length; i++ ) + if( sparseCols[i] != null && sparseCols[i].indexes().length>sparsePos[i] + &&sparseCols[i].indexes()[sparsePos[i]]==_lastRow ) + { + reusableArr[i] = sparseCols[i].values()[sparsePos[i]]; + zeroResult = false; + } + + return zeroResult ? ZERO_DBL_ARRAY : reusableReturn; + } +} http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/da318739/src/main/java/org/apache/sysml/runtime/compress/UncompressedBitmap.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/runtime/compress/UncompressedBitmap.java b/src/main/java/org/apache/sysml/runtime/compress/UncompressedBitmap.java index 971f438..84a2c6f 100644 --- a/src/main/java/org/apache/sysml/runtime/compress/UncompressedBitmap.java +++ b/src/main/java/org/apache/sysml/runtime/compress/UncompressedBitmap.java @@ -1,101 +1,101 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.sysml.runtime.compress; - -import java.util.Arrays; - -import org.apache.sysml.runtime.compress.utils.DblArrayIntListHashMap; -import org.apache.sysml.runtime.compress.utils.DoubleIntListHashMap; -import org.apache.sysml.runtime.compress.utils.DblArrayIntListHashMap.DArrayIListEntry; -import org.apache.sysml.runtime.compress.utils.DoubleIntListHashMap.DIListEntry; - -/** - * Uncompressed representation of one or more columns in bitmap format. - * - */ -public final class UncompressedBitmap -{ - private int _numCols; - - /** Distinct values that appear in the column. Linearized as value groups <v11 v12> <v21 v22>.*/ - private double[] _values; - - /** Bitmaps (as lists of offsets) for each of the values. */ - private int[][] _offsetsLists; - - public UncompressedBitmap( DblArrayIntListHashMap distinctVals, int numColumns ) - { - // added for one pass bitmap construction - // Convert inputs to arrays - int numVals = distinctVals.size(); - _values = new double[numVals*numColumns]; - _offsetsLists = new int[numVals][]; - int bitmapIx = 0; - for( DArrayIListEntry val : distinctVals.extractValues()) { - System.arraycopy(val.key.getData(), 0, _values, bitmapIx*numColumns, numColumns); - _offsetsLists[bitmapIx++] = val.value.extractValues(); - } - _numCols = numColumns; - } - - public UncompressedBitmap( DoubleIntListHashMap distinctVals ) - { - // added for one pass bitmap construction - // Convert inputs to arrays - int numVals = distinctVals.size(); - _values = new double[numVals]; - _offsetsLists = new int[numVals][]; - int bitmapIx = 0; - for(DIListEntry val : distinctVals.extractValues()) { - _values[bitmapIx] = val.key; - _offsetsLists[bitmapIx++] = val.value.extractValues(); - } - _numCols = 1; - } - - public int getNumColumns() { - return _numCols; - } - - /** - * @param ix index of a particular distinct value - * @return the tuple of column values associated with the specified index - */ - public double[] getValues(int ix) { - return Arrays.copyOfRange(_values, ix*_numCols, (ix+1)*_numCols); - } - - /** - * @return number of distinct values in the column; this number is also the - * number of bitmaps, since there is one bitmap per value - */ - public int getNumValues() { - return _values.length / _numCols; - } - - /** - * @param ix index of a particular distinct value - * @return IMMUTABLE array of the offsets of the rows containing the value - * with the indicated index - */ - public int[] getOffsetsList(int ix) { - return _offsetsLists[ix]; - } -} +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.sysml.runtime.compress; + +import java.util.Arrays; + +import org.apache.sysml.runtime.compress.utils.DblArrayIntListHashMap; +import org.apache.sysml.runtime.compress.utils.DoubleIntListHashMap; +import org.apache.sysml.runtime.compress.utils.DblArrayIntListHashMap.DArrayIListEntry; +import org.apache.sysml.runtime.compress.utils.DoubleIntListHashMap.DIListEntry; + +/** + * Uncompressed representation of one or more columns in bitmap format. + * + */ +public final class UncompressedBitmap +{ + private int _numCols; + + /** Distinct values that appear in the column. Linearized as value groups <v11 v12> <v21 v22>.*/ + private double[] _values; + + /** Bitmaps (as lists of offsets) for each of the values. */ + private int[][] _offsetsLists; + + public UncompressedBitmap( DblArrayIntListHashMap distinctVals, int numColumns ) + { + // added for one pass bitmap construction + // Convert inputs to arrays + int numVals = distinctVals.size(); + _values = new double[numVals*numColumns]; + _offsetsLists = new int[numVals][]; + int bitmapIx = 0; + for( DArrayIListEntry val : distinctVals.extractValues()) { + System.arraycopy(val.key.getData(), 0, _values, bitmapIx*numColumns, numColumns); + _offsetsLists[bitmapIx++] = val.value.extractValues(); + } + _numCols = numColumns; + } + + public UncompressedBitmap( DoubleIntListHashMap distinctVals ) + { + // added for one pass bitmap construction + // Convert inputs to arrays + int numVals = distinctVals.size(); + _values = new double[numVals]; + _offsetsLists = new int[numVals][]; + int bitmapIx = 0; + for(DIListEntry val : distinctVals.extractValues()) { + _values[bitmapIx] = val.key; + _offsetsLists[bitmapIx++] = val.value.extractValues(); + } + _numCols = 1; + } + + public int getNumColumns() { + return _numCols; + } + + /** + * @param ix index of a particular distinct value + * @return the tuple of column values associated with the specified index + */ + public double[] getValues(int ix) { + return Arrays.copyOfRange(_values, ix*_numCols, (ix+1)*_numCols); + } + + /** + * @return number of distinct values in the column; this number is also the + * number of bitmaps, since there is one bitmap per value + */ + public int getNumValues() { + return _values.length / _numCols; + } + + /** + * @param ix index of a particular distinct value + * @return IMMUTABLE array of the offsets of the rows containing the value + * with the indicated index + */ + public int[] getOffsetsList(int ix) { + return _offsetsLists[ix]; + } +} http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/da318739/src/main/java/org/apache/sysml/runtime/compress/estim/CompressedSizeEstimator.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/runtime/compress/estim/CompressedSizeEstimator.java b/src/main/java/org/apache/sysml/runtime/compress/estim/CompressedSizeEstimator.java index 1a1ae55..8d0a40b 100644 --- a/src/main/java/org/apache/sysml/runtime/compress/estim/CompressedSizeEstimator.java +++ b/src/main/java/org/apache/sysml/runtime/compress/estim/CompressedSizeEstimator.java @@ -1,145 +1,145 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.sysml.runtime.compress.estim; - -import org.apache.sysml.runtime.compress.BitmapEncoder; -import org.apache.sysml.runtime.compress.UncompressedBitmap; -import org.apache.sysml.runtime.matrix.data.MatrixBlock; - -/** - * Base class for all compressed size estimators - */ -public abstract class CompressedSizeEstimator -{ - protected MatrixBlock _data; - - public CompressedSizeEstimator(MatrixBlock data) { - _data = data; - } - - /** - * - * @param colIndexes - * @return - */ - public abstract CompressedSizeInfo estimateCompressedColGroupSize(int[] colIndexes); - - /** - * - * @param ubm - * @return - */ - public abstract CompressedSizeInfo estimateCompressedColGroupSize(UncompressedBitmap ubm); - - /** - * - * @param ubm - * @param inclRLE - * @return - */ - protected SizeEstimationFactors computeSizeEstimationFactors(UncompressedBitmap ubm, boolean inclRLE) { - int numVals = ubm.getNumValues(); - int numRuns = 0; - int numOffs = 0; - int numSegs = 0; - int numSingle = 0; - - //compute size estimation factors - for (int i = 0; i < numVals; i++) { - int[] list = ubm.getOffsetsList(i); - numOffs += list.length; - numSegs += list[list.length - 1] / BitmapEncoder.BITMAP_BLOCK_SZ + 1; - numSingle += (list.length==1) ? 1 : 0; - if( inclRLE ) { - int lastOff = -2; - for (int j = 0; j < list.length; j++) { - if (list[j] != lastOff + 1) - numRuns++; - lastOff = list[j]; - } - } - } - - //construct estimation factors - return new SizeEstimationFactors(numVals, numSegs, numOffs, numRuns, numSingle); - } - - /** - * Estimates the number of bytes needed to encode this column group - * in RLE encoding format. - * - * @param numVals - * @param numRuns - * @param numCols - * @return - */ - protected static long getRLESize(int numVals, int numRuns, int numCols) { - int ret = 0; - //distinct value tuples [double per col] - ret += 8 * numVals * numCols; - //offset/len fields per distinct value tuple [2xint] - ret += 8 * numVals; - //run data [2xchar] - ret += 4 * numRuns; - return ret; - } - - /** - * Estimates the number of bytes needed to encode this column group - * in OLE format. - * - * @param numVals - * @param numOffs - * @param numSeqs - * @param numCols - * @return - */ - protected static long getOLESize(int numVals, float numOffs, int numSeqs, int numCols) { - int ret = 0; - //distinct value tuples [double per col] - ret += 8 * numVals * numCols; - //offset/len fields per distinct value tuple [2xint] - ret += 8 * numVals; - //offset list data [1xchar] - ret += 2 * numOffs; - //offset list seqment headers [1xchar] - ret += 2 * numSeqs; - return ret; - } - - /** - * - */ - protected static class SizeEstimationFactors { - protected int numVals; //num value tuples - protected int numSegs; //num OLE segments - protected int numOffs; //num OLE offsets - protected int numRuns; //num RLE runs - protected int numSingle; //num singletons - - protected SizeEstimationFactors(int numvals, int numsegs, int numoffs, int numruns, int numsingle) { - numVals = numvals; - numSegs = numsegs; - numOffs = numoffs; - numRuns = numruns; - numSingle = numsingle; - } - } -} +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.sysml.runtime.compress.estim; + +import org.apache.sysml.runtime.compress.BitmapEncoder; +import org.apache.sysml.runtime.compress.UncompressedBitmap; +import org.apache.sysml.runtime.matrix.data.MatrixBlock; + +/** + * Base class for all compressed size estimators + */ +public abstract class CompressedSizeEstimator +{ + protected MatrixBlock _data; + + public CompressedSizeEstimator(MatrixBlock data) { + _data = data; + } + + /** + * + * @param colIndexes + * @return + */ + public abstract CompressedSizeInfo estimateCompressedColGroupSize(int[] colIndexes); + + /** + * + * @param ubm + * @return + */ + public abstract CompressedSizeInfo estimateCompressedColGroupSize(UncompressedBitmap ubm); + + /** + * + * @param ubm + * @param inclRLE + * @return + */ + protected SizeEstimationFactors computeSizeEstimationFactors(UncompressedBitmap ubm, boolean inclRLE) { + int numVals = ubm.getNumValues(); + int numRuns = 0; + int numOffs = 0; + int numSegs = 0; + int numSingle = 0; + + //compute size estimation factors + for (int i = 0; i < numVals; i++) { + int[] list = ubm.getOffsetsList(i); + numOffs += list.length; + numSegs += list[list.length - 1] / BitmapEncoder.BITMAP_BLOCK_SZ + 1; + numSingle += (list.length==1) ? 1 : 0; + if( inclRLE ) { + int lastOff = -2; + for (int j = 0; j < list.length; j++) { + if (list[j] != lastOff + 1) + numRuns++; + lastOff = list[j]; + } + } + } + + //construct estimation factors + return new SizeEstimationFactors(numVals, numSegs, numOffs, numRuns, numSingle); + } + + /** + * Estimates the number of bytes needed to encode this column group + * in RLE encoding format. + * + * @param numVals + * @param numRuns + * @param numCols + * @return + */ + protected static long getRLESize(int numVals, int numRuns, int numCols) { + int ret = 0; + //distinct value tuples [double per col] + ret += 8 * numVals * numCols; + //offset/len fields per distinct value tuple [2xint] + ret += 8 * numVals; + //run data [2xchar] + ret += 4 * numRuns; + return ret; + } + + /** + * Estimates the number of bytes needed to encode this column group + * in OLE format. + * + * @param numVals + * @param numOffs + * @param numSeqs + * @param numCols + * @return + */ + protected static long getOLESize(int numVals, float numOffs, int numSeqs, int numCols) { + int ret = 0; + //distinct value tuples [double per col] + ret += 8 * numVals * numCols; + //offset/len fields per distinct value tuple [2xint] + ret += 8 * numVals; + //offset list data [1xchar] + ret += 2 * numOffs; + //offset list seqment headers [1xchar] + ret += 2 * numSeqs; + return ret; + } + + /** + * + */ + protected static class SizeEstimationFactors { + protected int numVals; //num value tuples + protected int numSegs; //num OLE segments + protected int numOffs; //num OLE offsets + protected int numRuns; //num RLE runs + protected int numSingle; //num singletons + + protected SizeEstimationFactors(int numvals, int numsegs, int numoffs, int numruns, int numsingle) { + numVals = numvals; + numSegs = numsegs; + numOffs = numoffs; + numRuns = numruns; + numSingle = numsingle; + } + } +} http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/da318739/src/main/java/org/apache/sysml/runtime/compress/estim/CompressedSizeEstimatorExact.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/runtime/compress/estim/CompressedSizeEstimatorExact.java b/src/main/java/org/apache/sysml/runtime/compress/estim/CompressedSizeEstimatorExact.java index 557c518..d24255d 100644 --- a/src/main/java/org/apache/sysml/runtime/compress/estim/CompressedSizeEstimatorExact.java +++ b/src/main/java/org/apache/sysml/runtime/compress/estim/CompressedSizeEstimatorExact.java @@ -1,53 +1,53 @@ -/* - * Licensed to the Apache Software Foundation (ASF) under one - * or more contributor license agreements. See the NOTICE file - * distributed with this work for additional information - * regarding copyright ownership. The ASF licenses this file - * to you under the Apache License, Version 2.0 (the - * "License"); you may not use this file except in compliance - * with the License. You may obtain a copy of the License at - * - * http://www.apache.org/licenses/LICENSE-2.0 - * - * Unless required by applicable law or agreed to in writing, - * software distributed under the License is distributed on an - * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY - * KIND, either express or implied. See the License for the - * specific language governing permissions and limitations - * under the License. - */ - -package org.apache.sysml.runtime.compress.estim; - -import org.apache.sysml.runtime.compress.BitmapEncoder; -import org.apache.sysml.runtime.compress.UncompressedBitmap; -import org.apache.sysml.runtime.matrix.data.MatrixBlock; - -/** - * Exact compressed size estimator (examines entire dataset). - * - */ -public class CompressedSizeEstimatorExact extends CompressedSizeEstimator -{ - public CompressedSizeEstimatorExact(MatrixBlock data) { - super(data); - } - - @Override - public CompressedSizeInfo estimateCompressedColGroupSize(int[] colIndexes) { - return estimateCompressedColGroupSize( - BitmapEncoder.extractBitmap(colIndexes, _data)); - } - - @Override - public CompressedSizeInfo estimateCompressedColGroupSize(UncompressedBitmap ubm) - { - //compute size estimation factors - SizeEstimationFactors fact = computeSizeEstimationFactors(ubm, true); - - //construct new size info summary - return new CompressedSizeInfo(fact.numVals, - getRLESize(fact.numVals, fact.numRuns, ubm.getNumColumns()), - getOLESize(fact.numVals, fact.numOffs, fact.numSegs, ubm.getNumColumns())); - } -} +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.sysml.runtime.compress.estim; + +import org.apache.sysml.runtime.compress.BitmapEncoder; +import org.apache.sysml.runtime.compress.UncompressedBitmap; +import org.apache.sysml.runtime.matrix.data.MatrixBlock; + +/** + * Exact compressed size estimator (examines entire dataset). + * + */ +public class CompressedSizeEstimatorExact extends CompressedSizeEstimator +{ + public CompressedSizeEstimatorExact(MatrixBlock data) { + super(data); + } + + @Override + public CompressedSizeInfo estimateCompressedColGroupSize(int[] colIndexes) { + return estimateCompressedColGroupSize( + BitmapEncoder.extractBitmap(colIndexes, _data)); + } + + @Override + public CompressedSizeInfo estimateCompressedColGroupSize(UncompressedBitmap ubm) + { + //compute size estimation factors + SizeEstimationFactors fact = computeSizeEstimationFactors(ubm, true); + + //construct new size info summary + return new CompressedSizeInfo(fact.numVals, + getRLESize(fact.numVals, fact.numRuns, ubm.getNumColumns()), + getOLESize(fact.numVals, fact.numOffs, fact.numSegs, ubm.getNumColumns())); + } +}
