This is an automated email from the ASF dual-hosted git repository.
baunsgaard pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/systemds.git
The following commit(s) were added to refs/heads/main by this push:
new f094eaca6c [SYSTEMDS-3589] Frame single column ragged array
f094eaca6c is described below
commit f094eaca6c1a56652bf70ed29b368ebe5dcf856f
Author: Olga Ovcharenko <[email protected]>
AuthorDate: Mon Aug 21 17:07:28 2023 +0200
[SYSTEMDS-3589] Frame single column ragged array
This commit contains code to add a simple ragged array, that allows us
to allocate columns in frames with a lower number of contained materialized
values.
Closes #1857
Closes #1884
---
.../sysds/runtime/frame/data/FrameBlock.java | 2 +-
.../sysds/runtime/frame/data/columns/Array.java | 22 +-
.../runtime/frame/data/columns/ArrayFactory.java | 8 +-
.../runtime/frame/data/columns/CharArray.java | 2 +-
.../runtime/frame/data/columns/OptionalArray.java | 4 +-
.../runtime/frame/data/columns/RaggedArray.java | 238 +++++++++++++++++----
.../runtime/frame/data/columns/StringArray.java | 5 +-
.../component/frame/array/FrameArrayTests.java | 185 ++++++++++------
8 files changed, 346 insertions(+), 120 deletions(-)
diff --git a/src/main/java/org/apache/sysds/runtime/frame/data/FrameBlock.java
b/src/main/java/org/apache/sysds/runtime/frame/data/FrameBlock.java
index b9c748481a..513b788b60 100644
--- a/src/main/java/org/apache/sysds/runtime/frame/data/FrameBlock.java
+++ b/src/main/java/org/apache/sysds/runtime/frame/data/FrameBlock.java
@@ -861,7 +861,7 @@ public class FrameBlock implements CacheBlock<FrameBlock>,
Externalizable {
try {
size += pool.submit(() -> {
return
Arrays.stream(_coldata).parallel() // parallel columns
- .map(x ->
x.getInMemorySize()).reduce(0L, Long::sum);
+ .map(x
->x.getInMemorySize()).reduce(0L, (a,x) -> a + x);
}).get();
}
catch(InterruptedException | ExecutionException
e) {
diff --git
a/src/main/java/org/apache/sysds/runtime/frame/data/columns/Array.java
b/src/main/java/org/apache/sysds/runtime/frame/data/columns/Array.java
index f57e303a49..ff6c5d3d5f 100644
--- a/src/main/java/org/apache/sysds/runtime/frame/data/columns/Array.java
+++ b/src/main/java/org/apache/sysds/runtime/frame/data/columns/Array.java
@@ -147,8 +147,8 @@ public abstract class Array<T> implements Writable {
/**
* Get the value at a given index.
*
- * This method returns objects that have a high overhead in allocation.
Therefore it is not as efficient as using the
- * vectorized operations specified in the object.
+ * This method returns objects that have a high overhead in allocation.
Therefore it is not as efficient as using
+ * the vectorized operations specified in the object.
*
* @param index The index to query
* @return The value returned as an object
@@ -168,8 +168,24 @@ public abstract class Array<T> implements Writable {
*/
public abstract Object get();
+ /**
+ * Get the index's value.
+ *
+ * returns 0 in case of Null.
+ *
+ * @param i index to get value from
+ * @return the value
+ */
public abstract double getAsDouble(int i);
+ /**
+ * Get the index's value.
+ *
+ * returns Double.NaN in case of Null.
+ *
+ * @param i index to get value from
+ * @return the value
+ */
public double getAsNaNDouble(int i) {
return getAsDouble(i);
}
@@ -644,7 +660,7 @@ public abstract class Array<T> implements Writable {
if(ddcSize < memSize)
return new
ArrayCompressionStatistics(memSizePerElement, //
- estDistinct, true,
getValueType(),FrameArrayType.DDC, memSize, ddcSize);
+ estDistinct, true, getValueType(),
FrameArrayType.DDC, memSize, ddcSize);
return null;
}
diff --git
a/src/main/java/org/apache/sysds/runtime/frame/data/columns/ArrayFactory.java
b/src/main/java/org/apache/sysds/runtime/frame/data/columns/ArrayFactory.java
index 55d32f15be..92f4ee4c31 100644
---
a/src/main/java/org/apache/sysds/runtime/frame/data/columns/ArrayFactory.java
+++
b/src/main/java/org/apache/sysds/runtime/frame/data/columns/ArrayFactory.java
@@ -35,7 +35,7 @@ public interface ArrayFactory {
public final static int bitSetSwitchPoint = 64;
public enum FrameArrayType {
- STRING, BOOLEAN, BITSET, INT32, INT64, FP32, FP64, CHARACTER,
OPTIONAL, DDC;
+ STRING, BOOLEAN, BITSET, INT32, INT64, FP32, FP64, CHARACTER,
RAGGED, OPTIONAL, DDC;
}
public static StringArray create(String[] col) {
@@ -74,6 +74,10 @@ public interface ArrayFactory {
return new OptionalArray<>(col);
}
+ public static <T> RaggedArray<T> create(T[] col, int m) {
+ return new RaggedArray<T>(col, m);
+ }
+
public static long getInMemorySize(ValueType type, int _numRows,
boolean containsNull) {
if(containsNull) {
switch(type) {
@@ -216,6 +220,8 @@ public interface ArrayFactory {
case CHARACTER:
arr = new CharArray(new char[nRow]);
break;
+ case RAGGED:
+ return RaggedArray.readRagged(in, nRow);
case OPTIONAL:
return OptionalArray.readOpt(in, nRow);
case DDC:
diff --git
a/src/main/java/org/apache/sysds/runtime/frame/data/columns/CharArray.java
b/src/main/java/org/apache/sysds/runtime/frame/data/columns/CharArray.java
index be7044e907..d87cf39666 100644
--- a/src/main/java/org/apache/sysds/runtime/frame/data/columns/CharArray.java
+++ b/src/main/java/org/apache/sysds/runtime/frame/data/columns/CharArray.java
@@ -185,7 +185,7 @@ public class CharArray extends Array<Character> {
@Override
public long getExactSerializedSize() {
- return 1 + 2 * _data.length;
+ return 1L + 2L * _data.length;
}
@Override
diff --git
a/src/main/java/org/apache/sysds/runtime/frame/data/columns/OptionalArray.java
b/src/main/java/org/apache/sysds/runtime/frame/data/columns/OptionalArray.java
index 2405c22756..772b07af8b 100644
---
a/src/main/java/org/apache/sysds/runtime/frame/data/columns/OptionalArray.java
+++
b/src/main/java/org/apache/sysds/runtime/frame/data/columns/OptionalArray.java
@@ -459,9 +459,9 @@ public class OptionalArray<T> extends Array<T> {
@Override
public String toString() {
StringBuilder sb = new StringBuilder(_size + 2);
- sb.append(super.toString() + "<" + _a.getValueType() + ">:[");
+
sb.append(super.toString()).append("<").append(_a.getClass().getSimpleName()).append(">:[");
for(int i = 0; i < _size - 1; i++)
- sb.append(get(i) + ",");
+ sb.append(get(i)).append(",");
sb.append(get(_size - 1));
sb.append("]");
return sb.toString();
diff --git
a/src/main/java/org/apache/sysds/runtime/frame/data/columns/RaggedArray.java
b/src/main/java/org/apache/sysds/runtime/frame/data/columns/RaggedArray.java
index b8f49679b2..a2745df32a 100644
--- a/src/main/java/org/apache/sysds/runtime/frame/data/columns/RaggedArray.java
+++ b/src/main/java/org/apache/sysds/runtime/frame/data/columns/RaggedArray.java
@@ -29,12 +29,11 @@ import
org.apache.sysds.runtime.frame.data.columns.ArrayFactory.FrameArrayType;
import org.apache.sysds.runtime.matrix.data.Pair;
/**
- * A Ragged array for the columns contains a smaller array, only containing
the values of the top most part of the
- * column.
+ * A Ragged array for a single column contains a smaller array, only
containing the values of the top most part of the
*
- * This makes the allocation much better in cases where only the top n rows of
a m row frame is used for the specific
+ * This makes the allocation much better in cases where only the top n rows of
a m row frame are used for the specific
* column. It is typically used for instances of transform encode, where the
transform encode return a metadata frame to
- * enable encoding and decoding the matrix
+ * enable encoding and decoding the matrix.
*/
public class RaggedArray<T> extends Array<T> {
@@ -49,97 +48,203 @@ public class RaggedArray<T> extends Array<T> {
*/
public RaggedArray(T[] a, int m) {
super(m);
- throw new NotImplementedException();
+ this._a = ArrayFactory.create(a);
+ }
+
+ /**
+ * The allocation where, a's length is shorter than m, and we handle
all accesses above len(a) as null.
+ *
+ * @param a The underlying array that is shorter than length m
+ * @param m The overall supported length m
+ */
+ public RaggedArray(Array<T> a, int m) {
+ super(m);
+ this._a = a;
+ }
+
+ protected Array<T> getInnerArray() {
+ return _a;
}
@Override
public void write(DataOutput out) throws IOException {
- throw new NotImplementedException("Unimplemented method
'write'");
+ out.writeByte(FrameArrayType.RAGGED.ordinal());
+ out.writeInt(_size);
+ out.writeInt(_a.size());
+ _a.write(out);
}
@Override
+ @SuppressWarnings("unchecked")
public void readFields(DataInput in) throws IOException {
- throw new NotImplementedException("Unimplemented method
'readFields'");
+ _size = in.readInt();
+ _a = (Array<T>) ArrayFactory.read(in, in.readInt());
+ }
+
+ protected static RaggedArray<?> readRagged(DataInput in, int nRow)
throws IOException {
+
+ int m = in.readInt();
+ final Array<?> a = ArrayFactory.read(in, in.readInt());
+ return new RaggedArray<>(a, m);
+
}
@Override
public T get(int index) {
- throw new NotImplementedException("Unimplemented method 'get'");
+ if(index > _size || index < 0)
+ throw new ArrayIndexOutOfBoundsException("Index " +
index + " out of bounds " + _size);
+ return index < _a._size ? _a.get(index) : null;
}
@Override
public Object get() {
- throw new NotImplementedException("Unimplemented method 'get'");
+ throw new NotImplementedException("Should not be called");
}
@Override
public double getAsDouble(int i) {
- throw new NotImplementedException("Unimplemented method
'getAsDouble'");
+ return i < _a._size ? _a.getAsDouble(i) : 0;
+ }
+
+ @Override
+ public double getAsNaNDouble(int i) {
+ return i < _a._size ? _a.getAsNaNDouble(i) : Double.NaN;
}
@Override
public void set(int index, T value) {
- throw new NotImplementedException("Unimplemented method 'set'");
+ if(index < _a._size)
+ _a.set(index, value);
+ else if(index < super.size()) {
+ _a.reset(index + 1);
+ _a.set(index, value);
+ LOG.warn("Reallocated ragged array");
+ }
}
@Override
public void set(int index, double value) {
- throw new NotImplementedException("Unimplemented method 'set'");
+ if(index < _a._size)
+ _a.set(index, value);
+ else if(index < super.size()) {
+ _a.reset(index + 1);
+ _a.set(index, value);
+ LOG.warn("Reallocated ragged array");
+ }
}
@Override
public void set(int index, String value) {
- throw new NotImplementedException("Unimplemented method 'set'");
+ if(index < _a._size)
+ _a.set(index, value);
+ else if(index < super.size()) {
+ _a.reset(index + 1);
+ _a.set(index, value);
+ LOG.warn("Reallocated ragged array");
+ }
}
@Override
public void setFromOtherType(int rl, int ru, Array<?> value) {
- throw new NotImplementedException("Unimplemented method
'setFromOtherType'");
+ if(rl >= 0 && rl < _a._size && ru < _a._size)
+ _a.setFromOtherType(rl, ru, value);
+ else
+ throw new NotImplementedException("Unimplemented method
'setFromOtherType'");
}
@Override
public void set(int rl, int ru, Array<T> value) {
- throw new NotImplementedException("Unimplemented method 'set'");
+ if(rl >= 0 && rl < _a._size && ru < _a._size)
+ if(value instanceof RaggedArray)
+ _a.set(rl, ru, ((RaggedArray<T>)
value).getInnerArray());
+ else if(_a.getClass() == value.getClass())
+ _a.set(rl, ru, value);
+ else
+ throw new RuntimeException(
+ "RaggedArray set: value type should be
same to RaggedArray type " + _a.getClass());
+ else if(rl >= 0 && rl < super.size() && ru < super.size()) {
+ _a.reset(rl + 1);
+ _a.set(rl, ru, value);
+ LOG.warn("Reallocated ragged array");
+ }
}
@Override
public void set(int rl, int ru, Array<T> value, int rlSrc) {
- throw new NotImplementedException("Unimplemented method 'set'");
+ if(rl >= 0 && rl < _a._size && ru < _a._size)
+ if(value instanceof RaggedArray)
+ _a.set(rl, ru, ((RaggedArray<T>)
value).getInnerArray(), rlSrc);
+ else if(_a.getClass() == value.getClass())
+ _a.set(rl, ru, value, rlSrc);
+ else
+ throw new RuntimeException(
+ "RaggedArray set: value type should be
same to RaggedArray type " + _a.getClass());
}
@Override
public void setNz(int rl, int ru, Array<T> value) {
- throw new NotImplementedException("Unimplemented method
'setNz'");
+ if(rl >= 0 && rl < _a._size && ru < _a._size)
+ _a.setNz(rl, ru, value);
+ else
+ throw new NotImplementedException();
}
@Override
public void setFromOtherTypeNz(int rl, int ru, Array<?> value) {
- throw new NotImplementedException("Unimplemented method
'setFromOtherTypeNz'");
+ if(rl >= 0 && rl < _a._size && ru < _a._size)
+ _a.setFromOtherTypeNz(rl, ru, value);
+ else
+ throw new NotImplementedException();
}
@Override
public void append(String value) {
- throw new NotImplementedException("Unimplemented method
'append'");
+ Array<T> oldVals = _a.clone();
+ _a.reset(super.size() + 1);
+ _a.set(0, oldVals.size() - 1, oldVals);
+ _a.set(super.size(), value);
+ super._size += 1;
+
+ LOG.warn("Fully allocated ragged array");
}
@Override
public void append(T value) {
- throw new NotImplementedException("Unimplemented method
'append'");
+ Array<T> oldVals = _a.clone();
+ _a.reset(super.size() + 1);
+ _a.set(0, oldVals.size() - 1, oldVals);
+ _a.set(super.size(), value);
+ super._size += 1;
+
+ LOG.warn("Fully allocated ragged array");
}
@Override
public Array<T> append(Array<T> other) {
- throw new NotImplementedException("Unimplemented method
'append'");
+ Array<T> oldVals = _a.clone();
+ _a.reset(super.size() + other._size + 1);
+ _a.set(0, oldVals.size() - 1, oldVals);
+ _a.set(super.size(), super.size() + other.size() - 1, other);
+ super._size += other.size();
+
+ LOG.warn("Fully allocated ragged array");
+
+ return this;
}
@Override
public Array<T> slice(int rl, int ru) {
- throw new NotImplementedException("Unimplemented method
'slice'");
+ if(rl >= 0 && rl < _a._size && ru < _a._size)
+ return _a.slice(rl, ru);
+ else if(rl >= 0 && ru >= _a._size)
+ return _a.slice(rl, _a._size - 1);
+ return null;
}
@Override
public void reset(int size) {
- throw new NotImplementedException("Unimplemented method
'reset'");
+ _a.reset(size);
+ super._size = size;
}
@Override
@@ -149,107 +254,130 @@ public class RaggedArray<T> extends Array<T> {
@Override
public ValueType getValueType() {
- throw new NotImplementedException("Unimplemented method
'getValueType'");
+ return _a.getValueType();
}
@Override
public Pair<ValueType, Boolean> analyzeValueType() {
- throw new NotImplementedException("Unimplemented method
'analyzeValueType'");
+ return _a.analyzeValueType();
}
@Override
public FrameArrayType getFrameArrayType() {
- throw new NotImplementedException("Unimplemented method
'getFrameArrayType'");
+ return FrameArrayType.RAGGED;
}
@Override
public long getExactSerializedSize() {
- throw new NotImplementedException("Unimplemented method
'getExactSerializedSize'");
+ return _a.getExactSerializedSize() + 8 + 1;
}
@Override
protected Array<Boolean> changeTypeBitSet() {
- throw new NotImplementedException("Unimplemented method
'changeTypeBitSet'");
+ return _a.changeTypeBitSet();
}
@Override
protected Array<Boolean> changeTypeBoolean() {
- throw new NotImplementedException("Unimplemented method
'changeTypeBoolean'");
+ return _a.changeTypeBoolean();
}
@Override
protected Array<Double> changeTypeDouble() {
- throw new NotImplementedException("Unimplemented method
'changeTypeDouble'");
+ return _a.changeTypeDouble();
}
@Override
protected Array<Float> changeTypeFloat() {
- throw new NotImplementedException("Unimplemented method
'changeTypeFloat'");
+ return _a.changeTypeFloat();
}
@Override
protected Array<Integer> changeTypeInteger() {
- throw new NotImplementedException("Unimplemented method
'changeTypeInteger'");
+ return _a.changeTypeInteger();
}
@Override
protected Array<Long> changeTypeLong() {
- throw new NotImplementedException("Unimplemented method
'changeTypeLong'");
+ return _a.changeTypeLong();
}
@Override
protected Array<String> changeTypeString() {
- throw new NotImplementedException("Unimplemented method
'changeTypeString'");
+ return _a.changeTypeString();
}
@Override
protected Array<Character> changeTypeCharacter() {
- throw new NotImplementedException("Unimplemented method
'changeTypeCharacter'");
+ return _a.changeTypeCharacter();
}
@Override
public void fill(String val) {
- throw new NotImplementedException("Unimplemented method
'fill'");
+ _a.reset(super.size());
+ _a.fill(val);
}
@Override
public void fill(T val) {
- throw new NotImplementedException("Unimplemented method
'fill'");
+ _a.reset(super.size());
+ _a.fill(val);
}
@Override
public boolean isShallowSerialize() {
- throw new NotImplementedException("Unimplemented method
'isShallowSerialize'");
+ return _a.isShallowSerialize();
}
@Override
public boolean isEmpty() {
- throw new NotImplementedException("Unimplemented method
'isEmpty'");
+ return _a.isEmpty();
}
@Override
+ @SuppressWarnings("unchecked")
public Array<T> select(int[] indices) {
- throw new NotImplementedException("Unimplemented method
'select'");
+ Array<T> ret = _a.getFrameArrayType() ==
FrameArrayType.OPTIONAL ? //
+ (Array<T>)
ArrayFactory.allocateOptional(_a.getValueType(), indices.length) : //
+ (Array<T>) ArrayFactory.allocate(_a.getValueType(),
indices.length);
+ for(int i = 0; i < indices.length; i++)
+ ret.set(i, get(indices[i]));
+ return ret;
}
@Override
+ @SuppressWarnings("unchecked")
public Array<T> select(boolean[] select, int nTrue) {
- throw new NotImplementedException("Unimplemented method
'select'");
+ Array<T> ret = _a.getFrameArrayType() ==
FrameArrayType.OPTIONAL ? //
+ (Array<T>)
ArrayFactory.allocateOptional(_a.getValueType(), nTrue) : //
+ (Array<T>) ArrayFactory.allocate(_a.getValueType(),
nTrue);
+ int k = 0;
+ for(int i = 0; i < _a.size(); i++) {
+ if(select[i])
+ ret.set(k++, _a.get(i));
+ }
+
+ for(int i = _a.size(); i < select.length; i++) {
+ if(select[i])
+ ret.set(k++, get(i));
+ }
+
+ return ret;
}
@Override
public boolean isNotEmpty(int i) {
- throw new NotImplementedException("Unimplemented method
'isNotEmpty'");
+ return i < _a.size() && _a.isNotEmpty(i);
}
@Override
public Array<T> clone() {
- throw new NotImplementedException("Unimplemented method
'clone'");
+ return new RaggedArray<>(_a.clone(), super._size);
}
@Override
public double hashDouble(int idx) {
- throw new NotImplementedException("Unimplemented method
'hashDouble'");
+ return idx < _a.size() ? _a.hashDouble(idx) : Double.NaN;
}
@Override
@@ -257,4 +385,26 @@ public class RaggedArray<T> extends Array<T> {
throw new NotImplementedException("Unimplemented method
'equals'");
}
+ @Override
+ public long getInMemorySize() {
+ return baseMemoryCost() + _a.getInMemorySize() + 8;
+ }
+
+ @Override
+ public boolean containsNull() {
+ return (_a.size() < super._size) || _a.containsNull();
+ }
+
+ @Override
+ public String toString() {
+ StringBuilder sb = new StringBuilder(_size + 2);
+ sb.append(super.toString()).append("<");
+ sb.append(_a.getClass().getSimpleName()).append(">:[");
+ for(int i = 0; i < _size - 1; i++)
+ sb.append(get(i)).append(",");
+ sb.append(get(_size - 1));
+ sb.append("]");
+ return sb.toString();
+ }
+
}
diff --git
a/src/main/java/org/apache/sysds/runtime/frame/data/columns/StringArray.java
b/src/main/java/org/apache/sysds/runtime/frame/data/columns/StringArray.java
index 8eddc37707..e24815aeba 100644
--- a/src/main/java/org/apache/sysds/runtime/frame/data/columns/StringArray.java
+++ b/src/main/java/org/apache/sysds/runtime/frame/data/columns/StringArray.java
@@ -302,8 +302,9 @@ public class StringArray extends Array<String> {
firstNN = _data[i++];
}
- // detect type of transform.
- if(i == size()) // if all null return empty boolean.
+ if(firstNN == null)
+ // this check is similar to saying i == size();
+ // this means all values were null. therefore we have
an easy time retuning an empty boolean array.
return ArrayFactory.allocateBoolean(size());
else if(firstNN.toLowerCase().equals("true") ||
firstNN.toLowerCase().equals("false"))
return changeTypeBooleanStandard();
diff --git
a/src/test/java/org/apache/sysds/test/component/frame/array/FrameArrayTests.java
b/src/test/java/org/apache/sysds/test/component/frame/array/FrameArrayTests.java
index 48f7f0bd2d..c712789ab7 100644
---
a/src/test/java/org/apache/sysds/test/component/frame/array/FrameArrayTests.java
+++
b/src/test/java/org/apache/sysds/test/component/frame/array/FrameArrayTests.java
@@ -52,6 +52,7 @@ import org.apache.sysds.runtime.frame.data.columns.FloatArray;
import org.apache.sysds.runtime.frame.data.columns.IntegerArray;
import org.apache.sysds.runtime.frame.data.columns.LongArray;
import org.apache.sysds.runtime.frame.data.columns.OptionalArray;
+import org.apache.sysds.runtime.frame.data.columns.RaggedArray;
import org.apache.sysds.runtime.frame.data.columns.StringArray;
import org.apache.sysds.runtime.frame.data.lib.FrameLibRemoveEmpty;
import org.apache.sysds.runtime.matrix.data.Pair;
@@ -100,13 +101,17 @@ public class FrameArrayTests {
tests.add(new Object[] {ArrayFactory.create(new
String[] {"1", "0", "1"}), FrameArrayType.STRING});
tests.add(new Object[] {ArrayFactory.create(new
String[] {"1", "0", "null"}), FrameArrayType.STRING});
tests.add(new Object[] {ArrayFactory.create(new
String[] {"0", "0", "null"}), FrameArrayType.STRING});
- tests.add(new Object[] {ArrayFactory.create(new
String[] {"true", "false", "false"}), FrameArrayType.STRING});
- tests.add(new Object[] {ArrayFactory.create(new
String[] {"True", "False", "False"}), FrameArrayType.STRING});
- tests.add(new Object[] {ArrayFactory.create(new
String[] {"False", "False", "False"}), FrameArrayType.STRING});
+ tests.add(
+ new Object[] {ArrayFactory.create(new String[]
{"true", "false", "false"}), FrameArrayType.STRING});
+ tests.add(
+ new Object[] {ArrayFactory.create(new String[]
{"True", "False", "False"}), FrameArrayType.STRING});
+ tests.add(
+ new Object[] {ArrayFactory.create(new String[]
{"False", "False", "False"}), FrameArrayType.STRING});
tests.add(new Object[] {ArrayFactory.create(new
String[] {"T", "F", "F"}), FrameArrayType.STRING});
tests.add(new Object[] {ArrayFactory.create(new
String[] {"t", "f", "f"}), FrameArrayType.STRING});
tests.add(new Object[] {ArrayFactory.create(new
String[] {"f", "t", "t"}), FrameArrayType.STRING});
- tests.add(new Object[] {ArrayFactory.create(new
String[] {"true", "false", "BLAA"}), FrameArrayType.STRING});
+ tests
+ .add(new Object[] {ArrayFactory.create(new
String[] {"true", "false", "BLAA"}), FrameArrayType.STRING});
tests.add(new Object[] {ArrayFactory.create(new float[]
{0.0f, 1.0f, 1.0f, 0.0f}), FrameArrayType.FP32});
tests.add(new Object[] {ArrayFactory.create(new
double[] {0.0, 1.0, 1.0, 0.0}), FrameArrayType.FP64});
tests.add(new Object[] {ArrayFactory.create(new long[]
{0, 1, 1, 0, 0, 1}), FrameArrayType.INT64});
@@ -114,9 +119,12 @@ public class FrameArrayTests {
tests.add(new Object[]
{ArrayFactory.create(generateRandom01String(100, 324)), FrameArrayType.STRING});
tests.add(new Object[]
{ArrayFactory.create(generateRandom01String(80, 22)), FrameArrayType.STRING});
tests.add(new Object[]
{ArrayFactory.create(generateRandom01String(32, 221)), FrameArrayType.STRING});
- tests.add(new Object[]
{ArrayFactory.create(generateRandomTrueFalseString(32, 221)),
FrameArrayType.STRING});
- tests.add(new Object[]
{ArrayFactory.create(generateRandomTrueFalseString(80, 221)),
FrameArrayType.STRING});
- tests.add(new Object[]
{ArrayFactory.create(generateRandomTrueFalseString(150, 221)),
FrameArrayType.STRING});
+ tests
+ .add(new Object[]
{ArrayFactory.create(generateRandomTrueFalseString(32, 221)),
FrameArrayType.STRING});
+ tests
+ .add(new Object[]
{ArrayFactory.create(generateRandomTrueFalseString(80, 221)),
FrameArrayType.STRING});
+ tests.add(
+ new Object[]
{ArrayFactory.create(generateRandomTrueFalseString(150, 221)),
FrameArrayType.STRING});
tests.add(new Object[]
{ArrayFactory.create(generateRandomTFString(150, 221)), FrameArrayType.STRING});
tests.add(new Object[]
{ArrayFactory.create(generateRandomTFString(22, 2)), FrameArrayType.STRING});
tests.add(new Object[]
{ArrayFactory.create(generateRandomTFString(142, 4)), FrameArrayType.STRING});
@@ -129,8 +137,10 @@ public class FrameArrayTests {
tests.add(new Object[]
{ArrayFactory.create(generateRandomNullFloatString(67, 21)),
FrameArrayType.STRING});
tests.add(new Object[] {ArrayFactory.create(new
String[30]), FrameArrayType.STRING}); // all null
tests.add(new Object[] {ArrayFactory.create(new char[]
{0, 0, 0, 0, 1, 1, 1}), FrameArrayType.CHARACTER});
- tests.add(new Object[] {ArrayFactory.create(new char[]
{'t', 't', 'f', 'f', 'T'}), FrameArrayType.CHARACTER});
- tests.add(new Object[] {ArrayFactory.create(new char[]
{'0', '2', '3', '4', '9'}), FrameArrayType.CHARACTER});
+ tests.add(
+ new Object[] {ArrayFactory.create(new char[]
{'t', 't', 'f', 'f', 'T'}), FrameArrayType.CHARACTER});
+ tests.add(
+ new Object[] {ArrayFactory.create(new char[]
{'0', '2', '3', '4', '9'}), FrameArrayType.CHARACTER});
tests.add(new Object[]
{ArrayFactory.create(generateRandom01chars(150, 221)),
FrameArrayType.CHARACTER});
tests.add(new Object[]
{ArrayFactory.create(generateRandom01chars(67, 221)),
FrameArrayType.CHARACTER});
tests.add(new Object[]
{DDCArray.compressToDDC(ArrayFactory.create(generateRandom01chars(67, 221))),
@@ -138,7 +148,8 @@ public class FrameArrayTests {
tests.add(new Object[]
{DDCArray.compressToDDC(ArrayFactory.create(generateRandom01chars(30, 221))),
FrameArrayType.CHARACTER});
// Long to int
- tests.add(new Object[] {ArrayFactory.create(new long[]
{3214, 424, 13, 22, 111, 134}), FrameArrayType.INT64});
+ tests.add(
+ new Object[] {ArrayFactory.create(new long[]
{3214, 424, 13, 22, 111, 134}), FrameArrayType.INT64});
tests.add(new Object[] {ArrayFactory.create(new
double[] {//
Double.NaN, 424, 13, Double.NEGATIVE_INFINITY,
Double.POSITIVE_INFINITY, 134}), FrameArrayType.FP64});
@@ -228,10 +239,10 @@ public class FrameArrayTests {
case BOOLEAN:
if(a instanceof BooleanArray) // just in case
we overwrite the BitSet to boolean Array type.
estSize =
BooleanArray.estimateInMemorySize(a.size());
- break;
+ break;
default: // nothing
}
- if(a.getFrameArrayType() == FrameArrayType.DDC)
+ if(a.getFrameArrayType() == FrameArrayType.DDC ||
a.getFrameArrayType() == FrameArrayType.RAGGED)
return;
if(memSize > estSize)
fail("Estimated size is not smaller than actual:" +
memSize + " " + estSize + "\n" + a.getValueType() + " "
@@ -304,6 +315,8 @@ public class FrameArrayTests {
return;
if(t == FrameArrayType.DDC)// can be many things.
return;
+ if(t == FrameArrayType.OPTIONAL && a.getFrameArrayType() ==
FrameArrayType.RAGGED)
+ return;
if(a.getFrameArrayType() == FrameArrayType.DDC)
return; // can happen where DDC is wrapping Optional.
@@ -354,6 +367,7 @@ public class FrameArrayTests {
case CHARACTER:
x = a.get();
break;
+ case RAGGED:
case OPTIONAL:
try {
a.get();
@@ -715,7 +729,7 @@ public class FrameArrayTests {
// just test that it serialize as byte array with no
crashes
a.getAsByteArray();
}
- catch(DMLCompressionException e) {
+ catch(DMLCompressionException | NotImplementedException e) {
return; // valid
}
}
@@ -823,7 +837,7 @@ public class FrameArrayTests {
try {
aa.append((String) null);
- if(a.getFrameArrayType() == FrameArrayType.OPTIONAL)
+ if(a.getFrameArrayType() == FrameArrayType.OPTIONAL ||
a.getFrameArrayType() == FrameArrayType.RAGGED)
assertEquals(aa.get(aa.size() - 1), null);
else {
switch(a.getValueType()) {
@@ -870,7 +884,7 @@ public class FrameArrayTests {
for(int i = 0; i < 60; i++)
aa.append((String) null);
- if(a.getFrameArrayType() == FrameArrayType.OPTIONAL)
+ if(a.getFrameArrayType() == FrameArrayType.OPTIONAL ||
a.getFrameArrayType() == FrameArrayType.RAGGED)
assertEquals(aa.get(aa.size() - 1), null);
else {
switch(a.getValueType()) {
@@ -1044,7 +1058,9 @@ public class FrameArrayTests {
Array<?> aa = a.clone();
aa.reset(10);
- if(aa.getValueType() == ValueType.STRING ||
aa.getFrameArrayType() == FrameArrayType.OPTIONAL) {
+ if(aa.getValueType() == ValueType.STRING //
+ || aa.getFrameArrayType() ==
FrameArrayType.OPTIONAL //
+ || aa.getFrameArrayType() ==
FrameArrayType.RAGGED) {
for(int i = 0; i < 10; i++) {
assertEquals(null, aa.get(i));
}
@@ -1268,7 +1284,7 @@ public class FrameArrayTests {
@SuppressWarnings("unchecked")
public void testAppendValue() {
Array<?> aa = a.clone();
- boolean isOptional = aa instanceof OptionalArray;
+ boolean isOptional = aa instanceof OptionalArray || aa
instanceof RaggedArray;
try {
switch(a.getValueType()) {
@@ -1389,7 +1405,7 @@ public class FrameArrayTests {
@Test
public void fillNull() {
Array<?> aa = a.clone();
- boolean isOptional = aa instanceof OptionalArray;
+ boolean isOptional = aa instanceof OptionalArray || aa
instanceof RaggedArray;
try {
aa.fill((String) null);
@@ -1551,7 +1567,7 @@ public class FrameArrayTests {
try {
Array<?> aa = a.clone();
- boolean isOptional = aa instanceof OptionalArray;
+ boolean isOptional = aa instanceof OptionalArray || aa
instanceof RaggedArray;
switch(a.getValueType()) {
case BOOLEAN:
((Array<Boolean>) aa).fill((Boolean)
null);
@@ -1759,7 +1775,8 @@ public class FrameArrayTests {
return DDCArray
.compressToDDC(ArrayFactory.create(generateRandomIntegerNUniqueLengthOpt(size,
seed, nUnique)));
case INT64:
- return
DDCArray.compressToDDC(ArrayFactory.create(generateRandomLongNUniqueLengthOpt(size,
seed, nUnique)));
+ return DDCArray
+
.compressToDDC(ArrayFactory.create(generateRandomLongNUniqueLengthOpt(size,
seed, nUnique)));
case FP32:
return DDCArray
.compressToDDC(ArrayFactory.create(generateRandomFloatNUniqueLengthOpt(size,
seed, nUnique)));
@@ -1773,47 +1790,64 @@ public class FrameArrayTests {
Random r = new Random(seed);
switch(r.nextInt(7)) {
case 0:
- return DDCArray
-
.compressToDDC(ArrayFactory.create(generateRandomIntegerNUniqueLengthOpt(size,
seed, nUnique)));
+ return DDCArray.compressToDDC(
+
ArrayFactory.create(generateRandomIntegerNUniqueLengthOpt(size, seed,
nUnique)));
case 1:
- return DDCArray
-
.compressToDDC(ArrayFactory.create(generateRandomLongNUniqueLengthOpt(size,
seed, nUnique)));
+ return DDCArray.compressToDDC(
+
ArrayFactory.create(generateRandomLongNUniqueLengthOpt(size, seed, nUnique)));
case 2:
- return DDCArray
-
.compressToDDC(ArrayFactory.create(generateRandomDoubleNUniqueLengthOpt(size,
seed, nUnique)));
+ return DDCArray.compressToDDC(
+
ArrayFactory.create(generateRandomDoubleNUniqueLengthOpt(size, seed, nUnique)));
case 3:
- return DDCArray
-
.compressToDDC(ArrayFactory.create(generateRandomFloatNUniqueLengthOpt(size,
seed, nUnique)));
+ return DDCArray.compressToDDC(
+
ArrayFactory.create(generateRandomFloatNUniqueLengthOpt(size, seed, nUnique)));
case 4:
- return DDCArray
-
.compressToDDC(ArrayFactory.create(generateRandomCharacterNUniqueLengthOpt(size,
seed, nUnique)));
+ return DDCArray.compressToDDC(
+
ArrayFactory.create(generateRandomCharacterNUniqueLengthOpt(size, seed,
nUnique)));
default:
return
DDCArray.compressToDDC(ArrayFactory.create(generateRandomBooleanOpt(size,
seed)));
}
+ case RAGGED:
+ Random rand = new Random(seed);
+ switch(rand.nextInt(7)) {
+ case 0:
+ return
ArrayFactory.create(generateRandomIntegerOpt(size, seed), size);
+ case 1:
+ return
ArrayFactory.create(generateRandomLongOpt(size, seed), size);
+ case 2:
+ return
ArrayFactory.create(generateRandomDoubleOpt(size, seed), size);
+ case 3:
+ return
ArrayFactory.create(generateRandomFloatOpt(size, seed), size);
+ case 4:
+ return
ArrayFactory.create(generateRandomCharacterOpt(size, seed), size);
+ default:
+ return
ArrayFactory.create(generateRandomBooleanOpt(size, seed), size);
+ }
case DDC:
Random r2 = new Random(seed);
switch(r2.nextInt(7)) {
case 0:
- return DDCArray
-
.compressToDDC(ArrayFactory.create(generateRandomIntegerNUniqueLengthOpt(size,
seed, nUnique)));
+ return DDCArray.compressToDDC(
+
ArrayFactory.create(generateRandomIntegerNUniqueLengthOpt(size, seed,
nUnique)));
case 1:
- return DDCArray
-
.compressToDDC(ArrayFactory.create(generateRandomLongNUniqueLengthOpt(size,
seed, nUnique)));
+ return DDCArray.compressToDDC(
+
ArrayFactory.create(generateRandomLongNUniqueLengthOpt(size, seed, nUnique)));
case 2:
- return DDCArray
-
.compressToDDC(ArrayFactory.create(generateRandomDoubleNUniqueLengthOpt(size,
seed, nUnique)));
+ return DDCArray.compressToDDC(
+
ArrayFactory.create(generateRandomDoubleNUniqueLengthOpt(size, seed, nUnique)));
case 3:
- return DDCArray
-
.compressToDDC(ArrayFactory.create(generateRandomFloatNUniqueLengthOpt(size,
seed, nUnique)));
+ return DDCArray.compressToDDC(
+
ArrayFactory.create(generateRandomFloatNUniqueLengthOpt(size, seed, nUnique)));
case 4:
- return DDCArray
-
.compressToDDC(ArrayFactory.create(generateRandomCharacterNUniqueLengthOpt(size,
seed, nUnique)));
+ return DDCArray.compressToDDC(
+
ArrayFactory.create(generateRandomCharacterNUniqueLengthOpt(size, seed,
nUnique)));
case 5:
- return DDCArray
-
.compressToDDC(ArrayFactory.create(generateRandomStringNUniqueLengthOpt(size,
seed, nUnique, 32)));
+ return DDCArray.compressToDDC(
+
ArrayFactory.create(generateRandomStringNUniqueLengthOpt(size, seed, nUnique,
32)));
default:
return
DDCArray.compressToDDC(ArrayFactory.create(generateRandomBooleanOpt(size,
seed)));
}
+
default:
throw new DMLRuntimeException("Unsupported
value type: " + t);
@@ -1838,6 +1872,7 @@ public class FrameArrayTests {
case CHARACTER:
return
ArrayFactory.create(generateRandomCharacterOpt(size, seed));
case OPTIONAL:
+ case RAGGED: // lets not test this case here.
Random r = new Random(seed);
switch(r.nextInt(7)) {
case 0:
@@ -1858,23 +1893,23 @@ public class FrameArrayTests {
int nUnique = Math.max(size / 100, 2);
switch(r2.nextInt(7)) {
case 0:
- return DDCArray
-
.compressToDDC(ArrayFactory.create(generateRandomIntegerNUniqueLengthOpt(size,
seed, nUnique)));
+ return DDCArray.compressToDDC(
+
ArrayFactory.create(generateRandomIntegerNUniqueLengthOpt(size, seed,
nUnique)));
case 1:
- return DDCArray
-
.compressToDDC(ArrayFactory.create(generateRandomLongNUniqueLengthOpt(size,
seed, nUnique)));
+ return DDCArray.compressToDDC(
+
ArrayFactory.create(generateRandomLongNUniqueLengthOpt(size, seed, nUnique)));
case 2:
- return DDCArray
-
.compressToDDC(ArrayFactory.create(generateRandomDoubleNUniqueLengthOpt(size,
seed, nUnique)));
+ return DDCArray.compressToDDC(
+
ArrayFactory.create(generateRandomDoubleNUniqueLengthOpt(size, seed, nUnique)));
case 3:
- return DDCArray
-
.compressToDDC(ArrayFactory.create(generateRandomFloatNUniqueLengthOpt(size,
seed, nUnique)));
+ return DDCArray.compressToDDC(
+
ArrayFactory.create(generateRandomFloatNUniqueLengthOpt(size, seed, nUnique)));
case 4:
- return DDCArray
-
.compressToDDC(ArrayFactory.create(generateRandomCharacterNUniqueLengthOpt(size,
seed, nUnique)));
+ return DDCArray.compressToDDC(
+
ArrayFactory.create(generateRandomCharacterNUniqueLengthOpt(size, seed,
nUnique)));
case 5:
- return DDCArray
-
.compressToDDC(ArrayFactory.create(generateRandomStringNUniqueLengthOpt(size,
seed, nUnique, 32)));
+ return DDCArray.compressToDDC(
+
ArrayFactory.create(generateRandomStringNUniqueLengthOpt(size, seed, nUnique,
32)));
default:
return
DDCArray.compressToDDC(ArrayFactory.create(generateRandomBooleanOpt(size,
seed)));
}
@@ -1902,6 +1937,24 @@ public class FrameArrayTests {
return
ArrayFactory.create(generateRandomDouble(size, seed));
case CHARACTER:
return
ArrayFactory.create(generateRandomChar(size, seed));
+ case RAGGED:
+ Random rand = new Random(seed);
+ switch(rand.nextInt(7)) {
+ case 0:
+ return
ArrayFactory.create(generateRandomIntegerOpt(size, seed), size);
+ case 1:
+ return
ArrayFactory.create(generateRandomLongOpt(size, seed), size);
+ case 2:
+ return
ArrayFactory.create(generateRandomDoubleOpt(size, seed), size);
+ case 3:
+ return
ArrayFactory.create(generateRandomFloatOpt(size, seed), size);
+ case 4:
+ return
ArrayFactory.create(generateRandomCharacterOpt(size, seed), size);
+ case 5:
+ return
ArrayFactory.create(generateRandomString(size, seed), size);
+ default:
+ return
ArrayFactory.create(generateRandomBooleanOpt(size, seed), size);
+ }
case OPTIONAL:
Random r = new Random(seed);
switch(r.nextInt(7)) {
@@ -1923,23 +1976,23 @@ public class FrameArrayTests {
int nUnique = Math.max(size / 100, 2);
switch(r2.nextInt(7)) {
case 0:
- return DDCArray
-
.compressToDDC(ArrayFactory.create(generateRandomIntegerNUniqueLengthOpt(size,
seed, nUnique)));
+ return DDCArray.compressToDDC(
+
ArrayFactory.create(generateRandomIntegerNUniqueLengthOpt(size, seed,
nUnique)));
case 1:
- return DDCArray
-
.compressToDDC(ArrayFactory.create(generateRandomLongNUniqueLengthOpt(size,
seed, nUnique)));
+ return DDCArray.compressToDDC(
+
ArrayFactory.create(generateRandomLongNUniqueLengthOpt(size, seed, nUnique)));
case 2:
- return DDCArray
-
.compressToDDC(ArrayFactory.create(generateRandomDoubleNUniqueLengthOpt(size,
seed, nUnique)));
+ return DDCArray.compressToDDC(
+
ArrayFactory.create(generateRandomDoubleNUniqueLengthOpt(size, seed, nUnique)));
case 3:
- return DDCArray
-
.compressToDDC(ArrayFactory.create(generateRandomFloatNUniqueLengthOpt(size,
seed, nUnique)));
+ return DDCArray.compressToDDC(
+
ArrayFactory.create(generateRandomFloatNUniqueLengthOpt(size, seed, nUnique)));
case 4:
- return DDCArray
-
.compressToDDC(ArrayFactory.create(generateRandomCharacterNUniqueLengthOpt(size,
seed, nUnique)));
+ return DDCArray.compressToDDC(
+
ArrayFactory.create(generateRandomCharacterNUniqueLengthOpt(size, seed,
nUnique)));
case 5:
- return DDCArray
-
.compressToDDC(ArrayFactory.create(generateRandomStringNUniqueLengthOpt(size,
seed, nUnique, 32)));
+ return DDCArray.compressToDDC(
+
ArrayFactory.create(generateRandomStringNUniqueLengthOpt(size, seed, nUnique,
32)));
default:
return
DDCArray.compressToDDC(ArrayFactory.create(generateRandomBooleanOpt(size,
seed)));
}