This is an automated email from the ASF dual-hosted git repository.
baunsgaard pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/systemds.git
The following commit(s) were added to refs/heads/main by this push:
new 373cc44fbf [SYSTEMDS-3617] Offsets Separated
373cc44fbf is described below
commit 373cc44fbf93cff92b9cd0f0658c2910d6c90bc8
Author: Sebastian Baunsgaard <[email protected]>
AuthorDate: Wed Sep 6 14:59:49 2023 +0200
[SYSTEMDS-3617] Offsets Separated
This commit contains code to separate Offsets and construct a fast path
offset construction for offsets containing no skip elements of zero and
byte offsets that are ubyte.
In general this improve performance by removing if branches at the
almost lowest level of the execution of all column groups that use
Offsets, and it improve performance of constructing such groups.
Closes #1898
---
.../runtime/compress/colgroup/offset/AOffset.java | 40 +++--
.../compress/colgroup/offset/AOffsetByte.java} | 41 +++--
.../compress/colgroup/offset/ISliceOffset.java} | 21 +--
.../compress/colgroup/offset/OffsetByte.java | 199 ++++-----------------
.../compress/colgroup/offset/OffsetByteNZ.java | 169 +++++++++++++++++
.../compress/colgroup/offset/OffsetByteUNZ.java | 170 ++++++++++++++++++
.../compress/colgroup/offset/OffsetChar.java | 17 +-
.../compress/colgroup/offset/OffsetEmpty.java | 4 +-
.../compress/colgroup/offset/OffsetFactory.java | 184 ++++++++++---------
.../compress/colgroup/offset/OffsetSingle.java | 4 +-
.../compress/colgroup/offset/OffsetTwo.java | 4 +-
.../compress/mapping/MappingPreAggregateTests.java | 2 +-
.../component/compress/offset/LargeOffsetTest.java | 39 ++--
.../compress/offset/OffsetSingleTests.java | 5 +
.../component/compress/offset/OffsetTestUtil.java | 2 +
.../component/compress/offset/OffsetTests.java | 100 ++++++++++-
.../offset/OffsetTestsDefaultConstructor.java | 11 +-
17 files changed, 673 insertions(+), 339 deletions(-)
diff --git
a/src/main/java/org/apache/sysds/runtime/compress/colgroup/offset/AOffset.java
b/src/main/java/org/apache/sysds/runtime/compress/colgroup/offset/AOffset.java
index b52d0bf375..4c10fa7fac 100644
---
a/src/main/java/org/apache/sysds/runtime/compress/colgroup/offset/AOffset.java
+++
b/src/main/java/org/apache/sysds/runtime/compress/colgroup/offset/AOffset.java
@@ -95,7 +95,7 @@ public abstract class AOffset implements Serializable {
return getIterator();
else if(row > getOffsetToLast())
return null;
- final OffsetCache c = getLength() < skipStride ? null :
cacheRow.get();
+ final OffsetCache c = getLength() < skipStride ? null :
cacheRow.get();
if(c != null && c.row == row)
return c.it.clone();
else if(getLength() < skipStride)
@@ -241,8 +241,8 @@ public abstract class AOffset implements Serializable {
}
}
- protected final void preAggregateDenseMapRow(double[] mV, int off,
double[] preAV, int cu, int nVal, AMapToData data,
- AIterator it) {
+ protected final void preAggregateDenseMapRow(double[] mV, int off,
double[] preAV, int cu, int nVal,
+ AMapToData data, AIterator it) {
final int last = getOffsetToLast();
if(cu <= last)
preAggregateDenseMapRowBellowEnd(mV, off, preAV, cu,
nVal, data, it);
@@ -250,8 +250,8 @@ public abstract class AOffset implements Serializable {
preAggregateDenseMapRowEnd(mV, off, preAV, last, nVal,
data, it);
}
- protected final void preAggregateDenseMapRowBellowEnd(final double[]
mV, final int off, final double[] preAV, int cu,
- final int nVal, final AMapToData data, final AIterator it) {
+ protected final void preAggregateDenseMapRowBellowEnd(final double[]
mV, final int off, final double[] preAV,
+ int cu, final int nVal, final AMapToData data, final AIterator
it) {
it.offset += off;
cu += off;
while(it.offset < cu) {
@@ -444,7 +444,13 @@ public abstract class AOffset implements Serializable {
return false;
}
- protected abstract AOffset moveIndex(int m);
+ /**
+ * Move the index start x cells
+ *
+ * @param m The amount to move
+ * @return The moved index.
+ */
+ public abstract AOffset moveIndex(int m);
/**
* Get the length of the underlying array. This does not reflect the
number of contained elements, since some of the
@@ -452,7 +458,7 @@ public abstract class AOffset implements Serializable {
*
* @return The length of the underlying arrays
*/
- protected abstract int getLength();
+ public abstract int getLength();
public OffsetSliceInfo slice(int l, int u) {
AIterator it = getIterator(l);
@@ -492,10 +498,8 @@ public abstract class AOffset implements Serializable {
return new OffsetSliceInfo(low, high + 1, new
OffsetSingle(lowValue));
else if(low + 1 == high)
return new OffsetSliceInfo(low, high + 1, new
OffsetTwo(lowValue, highValue));
- else if(this instanceof OffsetByte)
- return ((OffsetByte) this).slice(lowOff, highOff,
lowValue, highValue, low, high);
- else // if(this instanceof OffsetChar)
- return ((OffsetChar) this).slice(lowOff, highOff,
lowValue, highValue, low, high);
+ else
+ return ((ISliceOffset) this).slice(lowOff, highOff,
lowValue, highValue, low, high);
}
/**
@@ -562,14 +566,19 @@ public abstract class AOffset implements Serializable {
sb.append(this.getClass().getSimpleName());
final AIterator it = getIterator();
if(it != null) {
+ int i = it.offset;
final int last = getOffsetToLast();
sb.append("[");
+ sb.append(it.offset);
while(it.offset < last) {
- sb.append(it.offset);
- sb.append(", ");
it.next();
+ sb.append(", ");
+ sb.append(it.offset);
+ if(it.offset - i <= 0)
+ throw new
DMLCompressionException("Invalid offset");
+ else
+ i = it.offset;
}
- sb.append(it.offset);
sb.append("]");
if(it.offset != last)
@@ -582,7 +591,8 @@ public abstract class AOffset implements Serializable {
public static AOffset reverse(int numRows, AOffset offsets) {
if(numRows < offsets.getOffsetToLast()) {
- throw new DMLRuntimeException("Invalid number of rows
for reverse");
+ throw new DMLRuntimeException(
+ "Invalid number of rows for reverse: last: " +
offsets.getOffsetToLast() + " numRows: " + numRows);
}
int[] newOff = new int[numRows - offsets.getSize()];
diff --git
a/src/test/java/org/apache/sysds/test/component/compress/offset/OffsetTestUtil.java
b/src/main/java/org/apache/sysds/runtime/compress/colgroup/offset/AOffsetByte.java
similarity index 52%
copy from
src/test/java/org/apache/sysds/test/component/compress/offset/OffsetTestUtil.java
copy to
src/main/java/org/apache/sysds/runtime/compress/colgroup/offset/AOffsetByte.java
index ea030cd9de..adbb4b9d06 100644
---
a/src/test/java/org/apache/sysds/test/component/compress/offset/OffsetTestUtil.java
+++
b/src/main/java/org/apache/sysds/runtime/compress/colgroup/offset/AOffsetByte.java
@@ -16,24 +16,35 @@
* specific language governing permissions and limitations
* under the License.
*/
+package org.apache.sysds.runtime.compress.colgroup.offset;
-package org.apache.sysds.test.component.compress.offset;
+public abstract class AOffsetByte extends AOffset implements ISliceOffset {
-import org.apache.commons.lang3.NotImplementedException;
-import org.apache.sysds.runtime.compress.colgroup.offset.AOffset;
-import org.apache.sysds.runtime.compress.colgroup.offset.OffsetFactory;
-import
org.apache.sysds.runtime.compress.colgroup.offset.OffsetFactory.OFF_TYPE;
+ private static final long serialVersionUID = -4716104973912491790L;
+ protected static final int maxV = 255;
-public class OffsetTestUtil {
+ protected final byte[] offsets;
+ protected final int offsetToFirst;
+ protected final int offsetToLast;
- public static AOffset getOffset(int[] data, OFF_TYPE type) {
- switch(type) {
- case CHAR:
- return OffsetFactory.createOffset(data, type);
- case BYTE:
- return OffsetFactory.createOffset(data, type);
- default:
- throw new NotImplementedException("not
implemented");
- }
+ protected AOffsetByte(byte[] offsets, int offsetToFirst, int
offsetToLast) {
+ this.offsets = offsets;
+ this.offsetToFirst = offsetToFirst;
+ this.offsetToLast = offsetToLast;
+ }
+
+ @Override
+ public final int getOffsetToFirst() {
+ return offsetToFirst;
+ }
+
+ @Override
+ public final int getOffsetToLast() {
+ return offsetToLast;
+ }
+
+ @Override
+ public final int getLength() {
+ return offsets.length;
}
}
diff --git
a/src/test/java/org/apache/sysds/test/component/compress/offset/OffsetTestUtil.java
b/src/main/java/org/apache/sysds/runtime/compress/colgroup/offset/ISliceOffset.java
similarity index 59%
copy from
src/test/java/org/apache/sysds/test/component/compress/offset/OffsetTestUtil.java
copy to
src/main/java/org/apache/sysds/runtime/compress/colgroup/offset/ISliceOffset.java
index ea030cd9de..cc608931a4 100644
---
a/src/test/java/org/apache/sysds/test/component/compress/offset/OffsetTestUtil.java
+++
b/src/main/java/org/apache/sysds/runtime/compress/colgroup/offset/ISliceOffset.java
@@ -17,23 +17,10 @@
* under the License.
*/
-package org.apache.sysds.test.component.compress.offset;
+package org.apache.sysds.runtime.compress.colgroup.offset;
-import org.apache.commons.lang3.NotImplementedException;
-import org.apache.sysds.runtime.compress.colgroup.offset.AOffset;
-import org.apache.sysds.runtime.compress.colgroup.offset.OffsetFactory;
-import
org.apache.sysds.runtime.compress.colgroup.offset.OffsetFactory.OFF_TYPE;
+import
org.apache.sysds.runtime.compress.colgroup.offset.AOffset.OffsetSliceInfo;
-public class OffsetTestUtil {
-
- public static AOffset getOffset(int[] data, OFF_TYPE type) {
- switch(type) {
- case CHAR:
- return OffsetFactory.createOffset(data, type);
- case BYTE:
- return OffsetFactory.createOffset(data, type);
- default:
- throw new NotImplementedException("not
implemented");
- }
- }
+public interface ISliceOffset {
+ public OffsetSliceInfo slice(int lowOff, int highOff, int lowValue, int
highValue, int low, int high);
}
diff --git
a/src/main/java/org/apache/sysds/runtime/compress/colgroup/offset/OffsetByte.java
b/src/main/java/org/apache/sysds/runtime/compress/colgroup/offset/OffsetByte.java
index 77a5a04111..22d5be5987 100644
---
a/src/main/java/org/apache/sysds/runtime/compress/colgroup/offset/OffsetByte.java
+++
b/src/main/java/org/apache/sysds/runtime/compress/colgroup/offset/OffsetByte.java
@@ -23,59 +23,50 @@ import java.io.DataOutput;
import java.io.IOException;
import java.util.Arrays;
+import org.apache.sysds.runtime.compress.CompressedMatrixBlock;
import org.apache.sysds.runtime.compress.colgroup.AOffsetsGroup;
import org.apache.sysds.utils.MemoryEstimates;
-public class OffsetByte extends AOffset {
+public class OffsetByte extends AOffsetByte {
private static final long serialVersionUID = -4716104973912491790L;
protected static final int maxV = 255;
- private final byte[] offsets;
- private final int offsetToFirst;
- private final int offsetToLast;
private final int size;
- private final boolean noOverHalf;
- private final boolean noZero;
-
- protected OffsetByte(byte[] offsets, int offsetToFirst, int
offsetToLast, int size, boolean noOverHalf,
- boolean noZero) {
- this.offsets = offsets;
- this.offsetToFirst = offsetToFirst;
- this.offsetToLast = offsetToLast;
- this.noOverHalf = noOverHalf;
- this.noZero = noZero;
+
+ protected OffsetByte(byte[] offsets, int offsetToFirst, int
offsetToLast, int size) {
+ super(offsets, offsetToFirst, offsetToLast);
this.size = size;
+ if(CompressedMatrixBlock.debug) {
+ this.toString();
+ }
+ }
+
+ protected static AOffsetByte create(byte[] offsets, int offsetToFirst,
int offsetToLast, int size, boolean noZero,
+ boolean ub) {
+ if(noZero) {
+ if(ub)
+ return new OffsetByteUNZ(offsets,
offsetToFirst, offsetToLast);
+ else
+ return new OffsetByteNZ(offsets, offsetToFirst,
offsetToLast);
+ }
+ else
+ return new OffsetByte(offsets, offsetToFirst,
offsetToLast, size);
}
@Override
public AIterator getIterator() {
- if(noOverHalf)
- return new IterateByteOffsetNoOverHalf();
- else if(noZero)
- return new IterateByteOffsetNoZero();
- else
- return new IterateByteOffset();
+ return new IterateByteOffset();
}
@Override
protected AIterator getIteratorFromIndexOff(int row, int dataIndex, int
offIdx) {
- if(noOverHalf)
- return new IterateByteOffsetNoOverHalf(dataIndex, row);
- else if(noZero)
- return new IterateByteOffsetNoZero(dataIndex, row);
- else
- return new IterateByteOffset(offIdx, dataIndex, row);
+ return new IterateByteOffset(offIdx, dataIndex, row);
}
@Override
public AOffsetIterator getOffsetIterator() {
- if(noOverHalf)
- return new OffsetByteIteratorNoOverHalf();
- else if(noZero)
- return new OffsetByteIteratorNoZero();
- else
- return new OffsetByteIterator();
+ return new OffsetByteIterator();
}
@Override
@@ -85,8 +76,7 @@ public class OffsetByte extends AOffset {
out.writeInt(offsets.length);
out.writeInt(offsetToLast);
out.writeInt(size);
- for(byte o : offsets)
- out.writeByte(o);
+ out.write(offsets);
}
@Override
@@ -99,16 +89,6 @@ public class OffsetByte extends AOffset {
return size;
}
- @Override
- public int getOffsetToFirst() {
- return offsetToFirst;
- }
-
- @Override
- public int getOffsetToLast() {
- return offsetToLast;
- }
-
@Override
public long getInMemorySize() {
return estimateInMemorySize(offsets.length);
@@ -127,29 +107,22 @@ public class OffsetByte extends AOffset {
final int size = in.readInt();
final byte[] offsets = new byte[offsetsLength];
+ in.readFully(offsets);
- for(int i = 0; i < offsetsLength; i++)
- offsets[i] = in.readByte();
-
- return new OffsetByte(offsets, offsetToFirst, offsetToLast,
size, OffsetFactory.getNoOverHalf(offsets),
- OffsetFactory.getNoZero(offsets));
+ return new OffsetByte(offsets, offsetToFirst, offsetToLast,
size);
}
- protected OffsetSliceInfo slice(int lowOff, int highOff, int lowValue,
int highValue, int low, int high) {
+ @Override
+ public OffsetSliceInfo slice(int lowOff, int highOff, int lowValue, int
highValue, int low, int high) {
int newSize = high - low + 1;
byte[] newOffsets = Arrays.copyOfRange(offsets, lowOff,
highOff);
- AOffset off = new OffsetByte(newOffsets, lowValue, highValue,
newSize, noOverHalf, noZero);
+ AOffset off = new OffsetByte(newOffsets, lowValue, highValue,
newSize);
return new OffsetSliceInfo(low, high + 1, off);
}
@Override
- protected AOffset moveIndex(int m) {
- return new OffsetByte(offsets, offsetToFirst - m, offsetToLast
- m, size, noOverHalf, noZero);
- }
-
- @Override
- protected int getLength() {
- return offsets.length;
+ public AOffset moveIndex(int m) {
+ return new OffsetByte(offsets, offsetToFirst - m, offsetToLast
- m, size);
}
@Override
@@ -201,8 +174,7 @@ public class OffsetByte extends AOffset {
}
final int offLast = s * (g.length - 1) + g[g.length -
1].getOffsets().getOffsetToLast();
- return new OffsetByte(ret, offsetToFirst, offLast, size,
OffsetFactory.getNoOverHalf(ret),
- OffsetFactory.getNoZero(ret));
+ return new OffsetByte(ret, offsetToFirst, offLast, size);
}
private class IterateByteOffset extends AIterator {
@@ -263,99 +235,6 @@ public class OffsetByte extends AOffset {
}
}
- private class IterateByteOffsetNoZero extends AIterator {
-
- protected int index;
-
- private IterateByteOffsetNoZero() {
- super(offsetToFirst);
- }
-
- private IterateByteOffsetNoZero(int index, int offset) {
- super(offset);
- this.index = index;
- }
-
- @Override
- public int next() {
- byte v = offsets[index];
- offset += v & 0xFF;
- index++;
- return offset;
- }
-
- @Override
- public int skipTo(int idx) {
- while(offset < idx && index < offsets.length)
- next();
-
- return offset;
- }
-
- @Override
- public IterateByteOffsetNoZero clone() {
- return new IterateByteOffsetNoZero(index, offset);
- }
-
- @Override
- public int getDataIndex() {
- return index;
- }
-
- @Override
- public int getOffsetsIndex() {
- return index;
- }
- }
-
- private class IterateByteOffsetNoOverHalf extends
IterateByteOffsetNoZero {
-
- private IterateByteOffsetNoOverHalf() {
- super();
- }
-
- private IterateByteOffsetNoOverHalf(int index, int offset) {
- super(index, offset);
- }
-
- @Override
- public final int next() {
- offset += offsets[index];
- index++;
- return offset;
- }
-
- @Override
- public final int skipTo(int idx) {
- while(offset < idx && index < offsets.length) {
- offset += offsets[index];
- index++;
- }
-
- return offset;
- }
-
- @Override
- public final IterateByteOffsetNoOverHalf clone() {
- return new IterateByteOffsetNoOverHalf(index, offset);
- }
- }
-
- private class OffsetByteIteratorNoOverHalf extends AOffsetIterator {
-
- protected int index;
-
- private OffsetByteIteratorNoOverHalf() {
- super(offsetToFirst);
- index = 0;
- }
-
- @Override
- public int next() {
- return offset += offsets[index++];
- }
- }
-
private class OffsetByteIterator extends AOffsetIterator {
protected int index;
@@ -378,18 +257,4 @@ public class OffsetByte extends AOffset {
}
}
- private class OffsetByteIteratorNoZero extends AOffsetIterator {
-
- protected int index;
-
- private OffsetByteIteratorNoZero() {
- super(offsetToFirst);
- index = 0;
- }
-
- @Override
- public int next() {
- return offset += offsets[index++] & 0xFF;
- }
- }
}
diff --git
a/src/main/java/org/apache/sysds/runtime/compress/colgroup/offset/OffsetByteNZ.java
b/src/main/java/org/apache/sysds/runtime/compress/colgroup/offset/OffsetByteNZ.java
new file mode 100644
index 0000000000..5999675b86
--- /dev/null
+++
b/src/main/java/org/apache/sysds/runtime/compress/colgroup/offset/OffsetByteNZ.java
@@ -0,0 +1,169 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.sysds.runtime.compress.colgroup.offset;
+
+import java.io.DataInput;
+import java.io.DataOutput;
+import java.io.IOException;
+import java.util.Arrays;
+
+import org.apache.sysds.runtime.compress.CompressedMatrixBlock;
+import org.apache.sysds.utils.MemoryEstimates;
+
+public class OffsetByteNZ extends AOffsetByte {
+
+ private static final long serialVersionUID = -4716104973912291890L;
+
+ protected OffsetByteNZ(byte[] offsets, int offsetToFirst, int
offsetToLast) {
+ super(offsets, offsetToFirst, offsetToLast);
+
+ if(CompressedMatrixBlock.debug) {
+ this.toString();
+ }
+ }
+
+ @Override
+ public AIterator getIterator() {
+ return new IterateByteOffsetNoZero();
+ }
+
+ @Override
+ protected AIterator getIteratorFromIndexOff(int row, int dataIndex, int
offIdx) {
+ return new IterateByteOffsetNoZero(dataIndex, row);
+ }
+
+ @Override
+ public AOffsetIterator getOffsetIterator() {
+ return new OffsetByteIteratorNoZero();
+ }
+
+ @Override
+ public void write(DataOutput out) throws IOException {
+
out.writeByte(OffsetFactory.OFF_TYPE_SPECIALIZATIONS.BYTENZ.ordinal());
+ out.writeInt(offsetToFirst);
+ out.writeInt(offsets.length);
+ out.writeInt(offsetToLast);
+ out.write(offsets);
+ }
+
+ @Override
+ public long getExactSizeOnDisk() {
+ return 1 + 4 + 4 + 4 + offsets.length;
+ }
+
+ @Override
+ public int getSize() {
+ return offsets.length + 1;
+ }
+
+ @Override
+ public long getInMemorySize() {
+ return estimateInMemorySize(offsets.length);
+ }
+
+ public static long estimateInMemorySize(int nOffs) {
+ long size = 16 + 4 + 4 + 8; // object header plus int plus
reference
+ size += MemoryEstimates.byteArrayCost(nOffs);
+ return size;
+ }
+
+ public static AOffsetByte readFields(DataInput in) throws IOException {
+ final int offsetToFirst = in.readInt();
+ final int offsetsLength = in.readInt();
+ final int offsetToLast = in.readInt();
+
+ final byte[] offsets = new byte[offsetsLength];
+ in.readFully(offsets);
+
+ return new OffsetByteNZ(offsets, offsetToFirst, offsetToLast);
+ }
+
+ @Override
+ public OffsetSliceInfo slice(int lowOff, int highOff, int lowValue, int
highValue, int low, int high) {
+ byte[] newOffsets = Arrays.copyOfRange(offsets, lowOff,
highOff);
+ AOffset off = new OffsetByteNZ(newOffsets, lowValue, highValue);
+ return new OffsetSliceInfo(low, high + 1, off);
+ }
+
+ @Override
+ public AOffset moveIndex(int m) {
+ return new OffsetByteNZ(offsets, offsetToFirst - m,
offsetToLast - m);
+ }
+
+ private class IterateByteOffsetNoZero extends AIterator {
+
+ protected int index;
+
+ private IterateByteOffsetNoZero() {
+ super(offsetToFirst);
+ }
+
+ private IterateByteOffsetNoZero(int index, int offset) {
+ super(offset);
+ this.index = index;
+ }
+
+ @Override
+ public int next() {
+ byte v = offsets[index];
+ offset += v & 0xFF;
+ index++;
+ return offset;
+ }
+
+ @Override
+ public int skipTo(int idx) {
+ while(offset < idx && index < offsets.length)
+ next();
+
+ return offset;
+ }
+
+ @Override
+ public IterateByteOffsetNoZero clone() {
+ return new IterateByteOffsetNoZero(index, offset);
+ }
+
+ @Override
+ public int getDataIndex() {
+ return index;
+ }
+
+ @Override
+ public int getOffsetsIndex() {
+ return index;
+ }
+ }
+
+ private class OffsetByteIteratorNoZero extends AOffsetIterator {
+
+ protected int index;
+
+ private OffsetByteIteratorNoZero() {
+ super(offsetToFirst);
+ index = 0;
+ }
+
+ @Override
+ public int next() {
+ return offset += offsets[index++] & 0xFF;
+ }
+ }
+
+}
diff --git
a/src/main/java/org/apache/sysds/runtime/compress/colgroup/offset/OffsetByteUNZ.java
b/src/main/java/org/apache/sysds/runtime/compress/colgroup/offset/OffsetByteUNZ.java
new file mode 100644
index 0000000000..d476fd4dbc
--- /dev/null
+++
b/src/main/java/org/apache/sysds/runtime/compress/colgroup/offset/OffsetByteUNZ.java
@@ -0,0 +1,170 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.sysds.runtime.compress.colgroup.offset;
+
+import java.io.DataInput;
+import java.io.DataOutput;
+import java.io.IOException;
+import java.util.Arrays;
+
+import org.apache.sysds.runtime.compress.CompressedMatrixBlock;
+import org.apache.sysds.utils.MemoryEstimates;
+
+public class OffsetByteUNZ extends AOffsetByte {
+
+ private static final long serialVersionUID = -4716104973912299990L;
+
+ protected OffsetByteUNZ(byte[] offsets, int offsetToFirst, int
offsetToLast) {
+ super(offsets, offsetToFirst, offsetToLast);
+
+ if(CompressedMatrixBlock.debug) {
+ this.toString();
+ }
+ }
+
+ @Override
+ public AIterator getIterator() {
+ return new IterateByteOffsetNoOverHalf();
+ }
+
+ @Override
+ protected AIterator getIteratorFromIndexOff(int row, int dataIndex, int
offIdx) {
+ return new IterateByteOffsetNoOverHalf(dataIndex, row);
+ }
+
+ @Override
+ public AOffsetIterator getOffsetIterator() {
+ return new OffsetByteIteratorNoOverHalf();
+ }
+
+ @Override
+ public void write(DataOutput out) throws IOException {
+
out.writeByte(OffsetFactory.OFF_TYPE_SPECIALIZATIONS.BYTEUNZ.ordinal());
+ out.writeInt(offsetToFirst);
+ out.writeInt(offsets.length);
+ out.writeInt(offsetToLast);
+ out.write(offsets);
+ }
+
+ @Override
+ public long getExactSizeOnDisk() {
+ return 1 + 4 + 4 + 4 + offsets.length;
+ }
+
+ @Override
+ public int getSize() {
+ return offsets.length + 1;
+ }
+
+ @Override
+ public long getInMemorySize() {
+ return estimateInMemorySize(offsets.length);
+ }
+
+ public static long estimateInMemorySize(int nOffs) {
+ long size = 16 + 4 + 4 + 8; // object header plus int plus
reference
+ size += MemoryEstimates.byteArrayCost(nOffs);
+ return size;
+ }
+
+ public static AOffsetByte readFields(DataInput in) throws IOException {
+ final int offsetToFirst = in.readInt();
+ final int offsetsLength = in.readInt();
+ final int offsetToLast = in.readInt();
+
+ final byte[] offsets = new byte[offsetsLength];
+ in.readFully(offsets);
+
+ return new OffsetByteUNZ(offsets, offsetToFirst, offsetToLast);
+ }
+
+ @Override
+ public OffsetSliceInfo slice(int lowOff, int highOff, int lowValue, int
highValue, int low, int high) {
+ byte[] newOffsets = Arrays.copyOfRange(offsets, lowOff,
highOff);
+ AOffset off = new OffsetByteUNZ(newOffsets, lowValue,
highValue);
+ return new OffsetSliceInfo(low, high + 1, off);
+ }
+
+ @Override
+ public AOffset moveIndex(int m) {
+ return new OffsetByteUNZ(offsets, offsetToFirst - m,
offsetToLast - m);
+ }
+
+ private class IterateByteOffsetNoOverHalf extends AIterator {
+
+ protected int index;
+
+ private IterateByteOffsetNoOverHalf() {
+ super(offsetToFirst);
+ }
+
+ private IterateByteOffsetNoOverHalf(int index, int offset) {
+ super(offset);
+ this.index = index;
+ }
+
+ @Override
+ public final int next() {
+ offset += offsets[index];
+ index++;
+ return offset;
+ }
+
+ @Override
+ public final int skipTo(int idx) {
+ while(offset < idx && index < offsets.length) {
+ offset += offsets[index];
+ index++;
+ }
+
+ return offset;
+ }
+
+ @Override
+ public int getDataIndex() {
+ return index;
+ }
+
+ @Override
+ public int getOffsetsIndex() {
+ return index;
+ }
+
+ @Override
+ public final IterateByteOffsetNoOverHalf clone() {
+ return new IterateByteOffsetNoOverHalf(index, offset);
+ }
+ }
+
+ private class OffsetByteIteratorNoOverHalf extends AOffsetIterator {
+
+ protected int index;
+
+ private OffsetByteIteratorNoOverHalf() {
+ super(offsetToFirst);
+ index = 0;
+ }
+
+ @Override
+ public int next() {
+ return offset += offsets[index++];
+ }
+ }
+
+}
diff --git
a/src/main/java/org/apache/sysds/runtime/compress/colgroup/offset/OffsetChar.java
b/src/main/java/org/apache/sysds/runtime/compress/colgroup/offset/OffsetChar.java
index af60db7cc3..3f37ddac9c 100644
---
a/src/main/java/org/apache/sysds/runtime/compress/colgroup/offset/OffsetChar.java
+++
b/src/main/java/org/apache/sysds/runtime/compress/colgroup/offset/OffsetChar.java
@@ -23,9 +23,10 @@ import java.io.DataOutput;
import java.io.IOException;
import java.util.Arrays;
+import org.apache.sysds.runtime.compress.CompressedMatrixBlock;
import org.apache.sysds.utils.MemoryEstimates;
-public class OffsetChar extends AOffset {
+public class OffsetChar extends AOffset implements ISliceOffset{
private static final long serialVersionUID = -1192266421395964882L;
protected static final int maxV = Character.MAX_VALUE;
@@ -40,6 +41,9 @@ public class OffsetChar extends AOffset {
this.offsetToFirst = offsetToFirst;
this.offsetToLast = offsetToLast;
this.noZero = noZero;
+ if(CompressedMatrixBlock.debug){
+ this.toString();
+ }
}
@Override
@@ -127,21 +131,22 @@ public class OffsetChar extends AOffset {
OffsetFactory.getNoZero(offsets);
return new OffsetChar(offsets, offsetToFirst, offsetToLast,
OffsetFactory.getNoZero(offsets));
- }
-
- protected OffsetSliceInfo slice(int lowOff, int highOff, int lowValue,
int highValue, int low, int high) {
+ }
+
+ @Override
+ public OffsetSliceInfo slice(int lowOff, int highOff, int lowValue, int
highValue, int low, int high) {
char[] newOffsets = Arrays.copyOfRange(offsets, lowOff,
highOff);
AOffset off = new OffsetChar(newOffsets, lowValue, highValue,
noZero);
return new OffsetSliceInfo(low, high + 1, off);
}
@Override
- protected AOffset moveIndex(int m) {
+ public AOffset moveIndex(int m) {
return new OffsetChar(offsets, offsetToFirst - m, offsetToLast
- m, noZero);
}
@Override
- protected int getLength() {
+ public int getLength() {
return offsets.length;
}
diff --git
a/src/main/java/org/apache/sysds/runtime/compress/colgroup/offset/OffsetEmpty.java
b/src/main/java/org/apache/sysds/runtime/compress/colgroup/offset/OffsetEmpty.java
index 863b9cd6f4..67a6ad55d2 100644
---
a/src/main/java/org/apache/sysds/runtime/compress/colgroup/offset/OffsetEmpty.java
+++
b/src/main/java/org/apache/sysds/runtime/compress/colgroup/offset/OffsetEmpty.java
@@ -90,12 +90,12 @@ public class OffsetEmpty extends AOffset {
}
@Override
- protected AOffset moveIndex(int m) {
+ public AOffset moveIndex(int m) {
return this;
}
@Override
- protected int getLength() {
+ public int getLength() {
return 0;
}
}
diff --git
a/src/main/java/org/apache/sysds/runtime/compress/colgroup/offset/OffsetFactory.java
b/src/main/java/org/apache/sysds/runtime/compress/colgroup/offset/OffsetFactory.java
index 9097c0bc40..2d2cfb874c 100644
---
a/src/main/java/org/apache/sysds/runtime/compress/colgroup/offset/OffsetFactory.java
+++
b/src/main/java/org/apache/sysds/runtime/compress/colgroup/offset/OffsetFactory.java
@@ -38,12 +38,16 @@ public final class OffsetFactory {
/** The specific underlying types of offsets. */
public enum OFF_TYPE {
- BYTE, CHAR
+ UBYTE, BYTE, CHAR
}
/** Specialized types of underlying offsets. */
public enum OFF_TYPE_SPECIALIZATIONS {
- BYTE, CHAR, SINGLE_OFFSET, TWO_OFFSET, EMPTY
+ BYTE, CHAR, SINGLE_OFFSET, TWO_OFFSET, EMPTY,
+ /** unsigned Byte no zero */
+ BYTEUNZ,
+ /** Byte no zero */
+ BYTENZ,
}
/**
@@ -133,13 +137,14 @@ public final class OffsetFactory {
for(int i = apos + 1; i < alen; i++) {
if(indexes[i] <= indexes[i - 1]) {
String message = "Invalid input to
create offset, all values should be continuously increasing.\n";
- message += "Index " + (i - 1) + " and
Index " + i + " are wrong with values: " + indexes[i - 1] + " and "
- + indexes[i];
+ message += "Index " + (i - 1) + " and
Index " + i + " are wrong with values: " + indexes[i - 1]
+ + " and " + indexes[i];
throw new
DMLCompressionException(message, e);
}
}
throw new DMLCompressionException(
- "Failed to create offset with input:" +
Arrays.toString(indexes) + " Apos: " + apos + " Alen: " + alen, e);
+ "Failed to create offset with input:" +
Arrays.toString(indexes) + " Apos: " + apos + " Alen: " + alen,
+ e);
}
}
@@ -173,6 +178,10 @@ public final class OffsetFactory {
return OffsetSingle.readFields(in);
case TWO_OFFSET:
return OffsetTwo.readFields(in);
+ case BYTEUNZ:
+ return OffsetByteUNZ.readFields(in);
+ case BYTENZ:
+ return OffsetByteNZ.readFields(in);
case BYTE:
return OffsetByte.readFields(in);
case CHAR:
@@ -221,111 +230,122 @@ public final class OffsetFactory {
}
private static AOffset createByte(int[] indexes, int apos, int alen) {
- final int indexesLength = alen - apos;
+ final int endSize = calcSize(indexes, apos, alen,
OffsetByte.maxV);
+ final int offsetToFirst = indexes[apos];
+ final int offsetToLast = indexes[alen - 1];
+ final boolean noZero = endSize == alen - apos - 1;
+ final byte[] offsets = new byte[endSize];
- int endSize = 0;
- int offsetToFirst = indexes[apos];
- int offsetToLast = indexes[alen - 1];
int ov = offsetToFirst;
- // find the size of the array
- for(int i = apos + 1; i < alen; i++) {
- final int nv = indexes[i];
- endSize += 1 + (nv - ov - 1) / OffsetByte.maxV;
- ov = nv;
- }
-
- boolean noZero = endSize == indexesLength - 1;
- byte[] offsets = new byte[endSize];
- ov = offsetToFirst;
int p = 0;
- // populate the array
- for(int i = apos + 1; i < alen; i++) {
- final int nv = indexes[i];
- final int offsetSize = nv - ov;
- if(offsetSize <= 0)
- throw new DMLCompressionException("Invalid
offset");
- final int div = offsetSize / OffsetByte.maxV;
- final int mod = offsetSize % OffsetByte.maxV;
- if(mod == 0) {
- p += div - 1; // skip values
- offsets[p++] = (byte) OffsetByte.maxV;
- }
- else {
- p += div; // skip values
- offsets[p++] = (byte) (mod);
+ if(noZero) {
+ final int mp1 = (OffsetByte.maxV + 1);
+ for(int i = apos + 1; i < alen; i++) {
+ final int nv = indexes[i];
+ final int offsetSize = nv - ov;
+ if(offsetSize <= 0)
+ throw new
DMLCompressionException("invalid offset construction with negative sequences");
+ final byte mod = (byte) (offsetSize % mp1);
+ offsets[p++] = mod;
+ ov = nv;
}
-
- ov = nv;
}
+ else {
+ final byte max = (byte) OffsetByte.maxV;
+ // populate the array
+ for(int i = apos + 1; i < alen; i++) {
+ final int nv = indexes[i];
+ final int offsetSize = nv - ov;
+ final int div = offsetSize / OffsetByte.maxV;
+ final byte mod = (byte) (offsetSize %
OffsetByte.maxV);
+ if(mod == 0) {
+ p += div - 1; // skip values
+ offsets[p++] = max;
+ }
+ else {
+ p += div; // skip values
+ offsets[p++] = mod;
+ }
+ ov = nv;
+ }
+ }
boolean noOverHalf = getNoOverHalf(offsets);
- return new OffsetByte(offsets, offsetToFirst, offsetToLast,
indexesLength, noOverHalf, noZero);
- }
+ return OffsetByte.create(offsets, offsetToFirst, offsetToLast,
alen - apos, noZero, noOverHalf);
- private static AOffset createChar(int[] indexes, int apos, int alen) {
+ }
+ private static int calcSize(int[] indexes, int apos, int alen, int
offMax) {
int endSize = 0;
- int offsetToFirst = indexes[apos];
- int offsetToLast = indexes[alen - 1];
- int ov = offsetToFirst;
+ int ov = indexes[apos];
+ // find the size of the array
for(int i = apos + 1; i < alen; i++) {
final int nv = indexes[i];
- endSize += 1 + (nv - ov - 1) / OffsetChar.maxV;
+ endSize += 1 + (nv - ov - 1) / offMax;
ov = nv;
}
- boolean noZero = endSize == alen - apos - 1;
- char[] offsets = new char[endSize];
- ov = offsetToFirst;
+ return endSize;
+ }
+
+ private static AOffset createChar(int[] indexes, int apos, int alen) {
+
+ final int endSize = calcSize(indexes, apos, alen,
OffsetChar.maxV);
+ final int offsetToFirst = indexes[apos];
+ final int offsetToLast = indexes[alen - 1];
+ final boolean noZero = endSize == alen - apos - 1;
+ final char[] offsets = new char[endSize];
+
+ int ov = offsetToFirst;
int p = 0;
- for(int i = apos + 1; i < alen; i++) {
- final int nv = indexes[i];
- final int offsetSize = (nv - ov);
- if(offsetSize <= 0)
- throw new DMLCompressionException("Invalid
offset");
- final int div = offsetSize / OffsetChar.maxV;
- final int mod = offsetSize % OffsetChar.maxV;
- if(mod == 0) {
- p += div - 1; // skip values
- offsets[p++] = (char) OffsetChar.maxV;
- }
- else {
- p += div; // skip values
+
+ final int mp1 = (OffsetChar.maxV + 1);
+ if(noZero) {
+ for(int i = apos + 1; i < alen; i++) {
+ final int nv = indexes[i];
+ final int offsetSize = (nv - ov);
+ if(offsetSize <= 0)
+ throw new
DMLCompressionException("invalid offset construction with negative sequences");
+ final int mod = offsetSize % mp1;
offsets[p++] = (char) (mod);
+ ov = nv;
}
- ov = nv;
}
- return new OffsetChar(offsets, offsetToFirst, offsetToLast,
noZero);
- }
+ else {
- protected static boolean getNoOverHalf(byte[] off) {
- boolean noOverHalf = true;
- for(byte b : off)
- if(b < 1) {
- noOverHalf = false;
- break;
+ // populate the array
+ for(int i = apos + 1; i < alen; i++) {
+ final int nv = indexes[i];
+ final int offsetSize = (nv - ov);
+ final int div = offsetSize / OffsetChar.maxV;
+ final int mod = offsetSize % OffsetChar.maxV;
+ if(mod == 0) {
+ p += div - 1; // skip values
+ offsets[p++] = (char) OffsetChar.maxV;
+ }
+ else {
+ p += div; // skip values
+ offsets[p++] = (char) (mod);
+ }
+ ov = nv;
}
- return noOverHalf;
+ }
+
+ return new OffsetChar(offsets, offsetToFirst, offsetToLast,
noZero);
}
- protected static boolean getNoZero(byte[] off) {
- boolean noZero = true;
+ protected static boolean getNoOverHalf(byte[] off) {
for(byte b : off)
- if(b == 0) {
- noZero = false;
- break;
- }
- return noZero;
+ if(b < (byte)1)
+ return false;
+ return true;
}
protected static boolean getNoZero(char[] off) {
- boolean noZero = true;
for(char b : off)
- if(b == 0) {
- noZero = false;
- break;
- }
- return noZero;
+ if(b == 0)
+ return false;
+ return true;
}
}
diff --git
a/src/main/java/org/apache/sysds/runtime/compress/colgroup/offset/OffsetSingle.java
b/src/main/java/org/apache/sysds/runtime/compress/colgroup/offset/OffsetSingle.java
index 0c88d98096..d77fec4a25 100644
---
a/src/main/java/org/apache/sysds/runtime/compress/colgroup/offset/OffsetSingle.java
+++
b/src/main/java/org/apache/sysds/runtime/compress/colgroup/offset/OffsetSingle.java
@@ -99,12 +99,12 @@ public class OffsetSingle extends AOffset {
}
@Override
- protected AOffset moveIndex(int m) {
+ public AOffset moveIndex(int m) {
return new OffsetSingle(off - m);
}
@Override
- protected int getLength() {
+ public int getLength() {
return 1;
}
diff --git
a/src/main/java/org/apache/sysds/runtime/compress/colgroup/offset/OffsetTwo.java
b/src/main/java/org/apache/sysds/runtime/compress/colgroup/offset/OffsetTwo.java
index 370dbc4e44..bd1545f8c7 100644
---
a/src/main/java/org/apache/sysds/runtime/compress/colgroup/offset/OffsetTwo.java
+++
b/src/main/java/org/apache/sysds/runtime/compress/colgroup/offset/OffsetTwo.java
@@ -109,12 +109,12 @@ public class OffsetTwo extends AOffset {
}
@Override
- protected AOffset moveIndex(int m) {
+ public AOffset moveIndex(int m) {
return new OffsetTwo(first - m, last - m);
}
@Override
- protected int getLength() {
+ public int getLength() {
return 2;
}
diff --git
a/src/test/java/org/apache/sysds/test/component/compress/mapping/MappingPreAggregateTests.java
b/src/test/java/org/apache/sysds/test/component/compress/mapping/MappingPreAggregateTests.java
index 8ef2bdffb8..1c2d105251 100644
---
a/src/test/java/org/apache/sysds/test/component/compress/mapping/MappingPreAggregateTests.java
+++
b/src/test/java/org/apache/sysds/test/component/compress/mapping/MappingPreAggregateTests.java
@@ -308,7 +308,7 @@ public class MappingPreAggregateTests {
private static final long serialVersionUID =
1910028460503867232L;
private OneOffset(byte[] offsets, int offsetToFirst, int
offsetToLast, int length) {
- super(offsets, offsetToFirst, offsetToLast, length,
false, false);
+ super(offsets, offsetToFirst, offsetToLast, length);
}
protected static OneOffset create(int length) {
diff --git
a/src/test/java/org/apache/sysds/test/component/compress/offset/LargeOffsetTest.java
b/src/test/java/org/apache/sysds/test/component/compress/offset/LargeOffsetTest.java
index de0d5cbe8c..4e6c934092 100644
---
a/src/test/java/org/apache/sysds/test/component/compress/offset/LargeOffsetTest.java
+++
b/src/test/java/org/apache/sysds/test/component/compress/offset/LargeOffsetTest.java
@@ -26,6 +26,7 @@ import java.util.Collection;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
+import org.apache.sysds.runtime.compress.CompressedMatrixBlock;
import org.apache.sysds.runtime.compress.colgroup.offset.AIterator;
import org.apache.sysds.runtime.compress.colgroup.offset.AOffset;
import
org.apache.sysds.runtime.compress.colgroup.offset.OffsetFactory.OFF_TYPE;
@@ -50,18 +51,19 @@ public class LargeOffsetTest {
ArrayList<Object[]> tests = new ArrayList<>();
// It is assumed that the input is in sorted order, all values
are positive and there are no duplicates.
for(OFF_TYPE t : OFF_TYPE.values()) {
- for(int i = 0; i < 4; i ++){
+ for(int i = 0; i < 4; i++) {
// tests.add(new Object[]{gen(100, 10, i),t});
// tests.add(new Object[]{gen(1000, 10, i),t});
- tests.add(new Object[]{gen(3030, 10, i),t});
- tests.add(new Object[]{gen(3030, 300, i),t});
- tests.add(new Object[]{gen(10000, 501, i),t});
+ tests.add(new Object[] {gen(3030, 10, i), t});
+ tests.add(new Object[] {gen(3030, 300, i), t});
+ tests.add(new Object[] {gen(10000, 501, i), t});
}
}
return tests;
}
public LargeOffsetTest(int[] data, OFF_TYPE type) {
+ CompressedMatrixBlock.debug = true;
this.data = data;
this.type = type;
this.o = OffsetTestUtil.getOffset(data, type);
@@ -79,8 +81,8 @@ public class LargeOffsetTest {
}
@Test
- public void IteratorAtStart(){
- try{
+ public void IteratorAtStart() {
+ try {
int idx = data.length / 3;
AIterator it = o.getIterator(data[idx]);
compare(it, data, idx);
@@ -92,8 +94,8 @@ public class LargeOffsetTest {
}
@Test
- public void IteratorAtMiddle(){
- try{
+ public void IteratorAtMiddle() {
+ try {
int idx = data.length / 2;
AIterator it = o.getIterator(data[idx]);
compare(it, data, idx);
@@ -105,8 +107,8 @@ public class LargeOffsetTest {
}
@Test
- public void IteratorAtEnd(){
- try{
+ public void IteratorAtEnd() {
+ try {
int idx = data.length / 4 * 3;
AIterator it = o.getIterator(data[idx]);
compare(it, data, idx);
@@ -117,21 +119,20 @@ public class LargeOffsetTest {
}
}
- private static void compare(AIterator it, int[] data, int off){
- for(; off< data.length; off++){
- assertEquals(data[off] , it.value());
- if(off +1 < data.length)
- it.next();
+ private static void compare(AIterator it, int[] data, int off) {
+ for(; off < data.length; off++) {
+ assertEquals(data[off], it.value());
+ if(off + 1 < data.length)
+ it.next();
}
}
-
- private static int[] gen(int size, int maxSkip, int seed){
+ private static int[] gen(int size, int maxSkip, int seed) {
int[] of = new int[size];
Random r = new Random(seed);
of[0] = r.nextInt(maxSkip);
- for(int i = 1; i < size; i ++){
- of[i] = r.nextInt(maxSkip) + of[i-1] + 1;
+ for(int i = 1; i < size; i++) {
+ of[i] = r.nextInt(maxSkip) + of[i - 1] + 1;
}
return of;
}
diff --git
a/src/test/java/org/apache/sysds/test/component/compress/offset/OffsetSingleTests.java
b/src/test/java/org/apache/sysds/test/component/compress/offset/OffsetSingleTests.java
index de30bbe9b1..537a445d35 100644
---
a/src/test/java/org/apache/sysds/test/component/compress/offset/OffsetSingleTests.java
+++
b/src/test/java/org/apache/sysds/test/component/compress/offset/OffsetSingleTests.java
@@ -23,6 +23,7 @@ import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertFalse;
import static org.junit.Assert.assertTrue;
+import org.apache.sysds.runtime.compress.CompressedMatrixBlock;
import org.apache.sysds.runtime.compress.DMLCompressionException;
import org.apache.sysds.runtime.compress.colgroup.offset.AOffset;
import org.apache.sysds.runtime.compress.colgroup.offset.OffsetEmpty;
@@ -33,6 +34,10 @@ import org.junit.Test;
public class OffsetSingleTests {
+ static {
+ CompressedMatrixBlock.debug = true;
+ }
+
@Test
public void testEmptyEstimateMemory() {
assertTrue(OffsetFactory.estimateInMemorySize(0, 10000) == 16);
diff --git
a/src/test/java/org/apache/sysds/test/component/compress/offset/OffsetTestUtil.java
b/src/test/java/org/apache/sysds/test/component/compress/offset/OffsetTestUtil.java
index ea030cd9de..420f26fa29 100644
---
a/src/test/java/org/apache/sysds/test/component/compress/offset/OffsetTestUtil.java
+++
b/src/test/java/org/apache/sysds/test/component/compress/offset/OffsetTestUtil.java
@@ -32,6 +32,8 @@ public class OffsetTestUtil {
return OffsetFactory.createOffset(data, type);
case BYTE:
return OffsetFactory.createOffset(data, type);
+ case UBYTE:
+ return OffsetFactory.createOffset(data, type);
default:
throw new NotImplementedException("not
implemented");
}
diff --git
a/src/test/java/org/apache/sysds/test/component/compress/offset/OffsetTests.java
b/src/test/java/org/apache/sysds/test/component/compress/offset/OffsetTests.java
index 2d5f09f4b3..d98375e83a 100644
---
a/src/test/java/org/apache/sysds/test/component/compress/offset/OffsetTests.java
+++
b/src/test/java/org/apache/sysds/test/component/compress/offset/OffsetTests.java
@@ -33,9 +33,11 @@ import java.io.IOException;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
+import java.util.Random;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
+import org.apache.sysds.runtime.compress.CompressedMatrixBlock;
import org.apache.sysds.runtime.compress.DMLCompressionException;
import org.apache.sysds.runtime.compress.colgroup.AOffsetsGroup;
import org.apache.sysds.runtime.compress.colgroup.offset.AIterator;
@@ -98,6 +100,8 @@ public class OffsetTests {
tests.add(new Object[] {new int[] {0, 255 * 3}, t});
tests.add(new Object[] {new int[] {0, 255 * 4}, t});
tests.add(new Object[] {new int[] {0, 256 * 3}, t});
+ tests.add(new Object[] {new int[] {0, 100, 200, 300,
400, 500, 600}, t});
+ tests.add(new Object[] {new int[] {0, 200, 400, 600,
800, 1000, 1200}, t});
tests.add(new Object[] {new int[] {255 * 3, 255 * 5},
t});
tests.add(new Object[] {new int[] {1000000, 1000000 +
255 * 5}, t});
tests.add(new Object[] {new int[] {100000000, 100000000
+ 255 * 5}, t});
@@ -106,6 +110,17 @@ public class OffsetTests {
tests.add(new Object[] {new int[] {0, 1, 2, 3, 4, 5},
t});
tests.add(new Object[] {new int[] {2458248, 2458249,
2458253, 2458254, 2458256, 2458257, 2458258, 2458262,
2458264, 2458266, 2458267, 2458271, 2458272,
2458275, 2458276, 2458281}, t});
+
+ tests.add(new Object[] {gen(100, 100, 1), t});
+ for(int i = 0; i < 10; i++) {
+ tests.add(new Object[] {gen(100, 200, i), t});
+ tests.add(new Object[] {gen(100, 250, i +
10230), t});
+ tests.add(new Object[] {gen(100, 4, i + 120),
t});
+ tests.add(new Object[] {gen(100, 350, i +
1030), t});
+ tests.add(new Object[] {gen(30, 1000, i +
101420), t});
+ tests.add(new Object[] {gen(30, 3000, i +
101420), t});
+
+ }
}
tests.add(new Object[] {new int[] {Character.MAX_VALUE, ((int)
Character.MAX_VALUE) * 2}, OFF_TYPE.CHAR});
tests.add(new Object[] {new int[] {0, Character.MAX_VALUE,
((int) Character.MAX_VALUE) * 2}, OFF_TYPE.CHAR});
@@ -114,7 +129,20 @@ public class OffsetTests {
return tests;
}
+ private static int[] gen(int i, int j, int seed) {
+ int[] a = new int[i];
+ Random r = new Random(seed);
+ int o = r.nextInt(j);
+ a[0] = o;
+ for(int k = 1; k < i; k++) {
+ o += r.nextInt(j) + 1;
+ a[k] = o;
+ }
+ return a;
+ }
+
public OffsetTests(int[] data, OFF_TYPE type) {
+ CompressedMatrixBlock.debug = true;
this.data = data;
this.type = type;
this.o = OffsetTestUtil.getOffset(data, type);
@@ -240,11 +268,14 @@ public class OffsetTests {
switch(type) {
case BYTE:
- final int correctionByte =
OffsetFactory.correctionByte(data[data.length - 1] - data[0], data.length);
+ case UBYTE:
+ final int correctionByte =
OffsetFactory.correctionByte(data[data.length - 1] - data[0],
+ data.length);
estimatedSize =
OffsetByte.estimateInMemorySize(data.length + correctionByte);
break;
case CHAR:
- final int correctionChar =
OffsetFactory.correctionChar(data[data.length - 1] - data[0], data.length);
+ final int correctionChar =
OffsetFactory.correctionChar(data[data.length - 1] - data[0],
+ data.length);
estimatedSize =
OffsetChar.estimateInMemorySize(data.length + correctionChar);
break;
default:
@@ -631,20 +662,41 @@ public class OffsetTests {
}
}
+ @Test
+ public void compareAppend_v3() {
+ if(data.length > 0) {
+ final int ll = data[data.length - 1] + 1000;
+ final AOffset r1 = o.appendN(new AOffsetsGroup[] {new
Con(o), new Con(o)}, ll);
+ final AOffset r2 = o.append(o, ll);
+ compare(r1, r2);
+ }
+ }
+
+ @Test
+ public void compareAppend_ot() {
+ if(data.length > 0) {
+ final int ll = data[data.length - 1] + 1000;
+ final AOffset r1 = o.appendN(new AOffsetsGroup[] {new
Con(o), new Con(OffsetFactory.createOffset(data))},
+ ll);
+ final AOffset r2 = o.append(o, ll);
+ compare(r1, r2);
+ }
+ }
+
@Test
public void compareAppend_2x() {
- try{
+ try {
if(data.length > 0) {
-
+
final int ll = data[data.length - 1] + 100;
final AOffset r = o.appendN(new AOffsetsGroup[]
{new Con(o), new Con(o), new Con(o)}, ll);
final AOffset t2 = o.append(o, ll).append(o, ll
* 2);
compare(r, t2);
}
}
- catch(Exception e){
+ catch(Exception e) {
e.printStackTrace();
fail(e.getMessage());
}
@@ -653,17 +705,17 @@ public class OffsetTests {
@Test
public void compareAppend_2x_v2() {
- try{
+ try {
if(data.length > 0) {
-
+
final int ll = data[data.length - 1] + 280;
final AOffset r = o.appendN(new AOffsetsGroup[]
{new Con(o), new Con(o), new Con(o)}, ll);
final AOffset t2 = o.append(o, ll).append(o, ll
* 2);
compare(r, t2);
}
}
- catch(Exception e){
+ catch(Exception e) {
e.printStackTrace();
fail(e.getMessage());
}
@@ -693,6 +745,38 @@ public class OffsetTests {
}
}
+ @Test
+ public void moveIndex1() {
+
+ AOffset b = o.moveIndex(10);
+ compareMoved(b, data, -10);
+ }
+
+ @Test
+ public void getLength(){
+ assertTrue(o.getLength() + 1 >= data.length);
+ }
+
+
+ private void compareMoved(AOffset o, int[] v, int m) {
+ AIterator i = o.getIterator();
+ if(o.getSize() != v.length) {
+ fail("Incorrect result sizes : " + o + " " +
Arrays.toString(v));
+ }
+ if(o.getSize() > 0) {
+ assertEquals(o.getOffsetToLast(), v[v.length - 1] + m);
+ if(v[0] + m != i.value())
+ fail("incorrect result using : " +
o.getClass().getSimpleName() + " expected: " + Arrays.toString(v)
+ + " but was :" + o.toString());
+ for(int j = 1; j < v.length; j++) {
+ i.next();
+ if(v[j] + m != i.value())
+ fail("incorrect result using : " +
o.getClass().getSimpleName() + " expected: " + Arrays.toString(v)
+ + " but was :" + o.toString());
+ }
+ }
+ }
+
private class Con implements AOffsetsGroup {
AOffset a;
diff --git
a/src/test/java/org/apache/sysds/test/component/compress/offset/OffsetTestsDefaultConstructor.java
b/src/test/java/org/apache/sysds/test/component/compress/offset/OffsetTestsDefaultConstructor.java
index ea9017df54..179ece9b7a 100644
---
a/src/test/java/org/apache/sysds/test/component/compress/offset/OffsetTestsDefaultConstructor.java
+++
b/src/test/java/org/apache/sysds/test/component/compress/offset/OffsetTestsDefaultConstructor.java
@@ -26,6 +26,7 @@ import java.util.Collection;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
+import org.apache.sysds.runtime.compress.CompressedMatrixBlock;
import org.apache.sysds.runtime.compress.colgroup.offset.AOffset;
import org.apache.sysds.runtime.compress.colgroup.offset.OffsetFactory;
import org.junit.Test;
@@ -39,6 +40,10 @@ public class OffsetTestsDefaultConstructor {
private static final long sizeTolerance = 100;
+ static {
+ CompressedMatrixBlock.debug = true;
+ }
+
public int[] data;
private AOffset o;
@@ -106,11 +111,11 @@ public class OffsetTestsDefaultConstructor {
}
@Test
- public void testMemoryEstimate(){
- final long est =
OffsetFactory.estimateInMemorySize(data.length, data[data.length -1]);
+ public void testMemoryEstimate() {
+ final long est =
OffsetFactory.estimateInMemorySize(data.length, data[data.length - 1]);
final long act = o.getInMemorySize();
- if(!( act <= est + sizeTolerance))
+ if(!(act <= est + sizeTolerance))
fail("In memory is not smaller than estimate " + est +
" " + act);
}
}