This is an automated email from the ASF dual-hosted git repository.
baunsgaard pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/systemds.git
The following commit(s) were added to refs/heads/main by this push:
new 7561f61a14 [SYSTEMDS-3640] Hash Column
7561f61a14 is described below
commit 7561f61a14dc1097e3bfcfee497a90451b4564f1
Author: Sebastian Baunsgaard <[email protected]>
AuthorDate: Wed Oct 25 10:38:02 2023 +0200
[SYSTEMDS-3640] Hash Column
This commit adds a new value type HASH64 for that can contain hashes
of 16 hex encoded characters. It behaves internally as if it is a string
column, but allocate a single long value per cell.
This reduce the allocation of columns with hash values from 40+ byte per
value to 8 byte.
Closes #1933
---
src/main/java/org/apache/sysds/common/Types.java | 17 +-
.../sysds/runtime/compress/colgroup/APreAgg.java | 2 +-
.../sysds/runtime/compress/lib/CLALibScalar.java | 2 +-
.../sysds/runtime/frame/data/columns/Array.java | 11 ++
.../runtime/frame/data/columns/ArrayFactory.java | 33 +++-
.../runtime/frame/data/columns/BitSetArray.java | 8 +
.../runtime/frame/data/columns/BooleanArray.java | 8 +
.../runtime/frame/data/columns/CharArray.java | 8 +
.../sysds/runtime/frame/data/columns/DDCArray.java | 5 +
.../runtime/frame/data/columns/DoubleArray.java | 11 ++
.../runtime/frame/data/columns/FloatArray.java | 8 +
.../columns/{LongArray.java => HashLongArray.java} | 213 +++++++++++++--------
.../runtime/frame/data/columns/IntegerArray.java | 8 +
.../runtime/frame/data/columns/LongArray.java | 5 +
.../runtime/frame/data/columns/OptionalArray.java | 17 ++
.../runtime/frame/data/columns/RaggedArray.java | 5 +
.../runtime/frame/data/columns/StringArray.java | 31 ++-
.../frame/data/lib/FrameLibApplySchema.java | 1 +
.../sysds/runtime/frame/data/lib/FrameUtil.java | 20 +-
.../apache/sysds/runtime/util/UtilFunctions.java | 16 +-
src/test/java/org/apache/sysds/test/TestUtils.java | 1 +
.../component/frame/array/CustomArrayTests.java | 55 +++++-
.../frame/array/FrameArrayConstantTests.java | 2 +
.../component/frame/array/FrameArrayTests.java | 159 +++++++++++++--
.../component/frame/iterators/IteratorTest.java | 37 ++--
25 files changed, 549 insertions(+), 134 deletions(-)
diff --git a/src/main/java/org/apache/sysds/common/Types.java
b/src/main/java/org/apache/sysds/common/Types.java
index 4b8f1c3a00..84019e8078 100644
--- a/src/main/java/org/apache/sysds/common/Types.java
+++ b/src/main/java/org/apache/sysds/common/Types.java
@@ -77,17 +77,21 @@ public class Types
public enum ValueType {
UINT4, UINT8, // Used for parsing in UINT values from numpy.
FP32, FP64, INT32, INT64, BOOLEAN, STRING, UNKNOWN,
+ HASH64, // Indicate that the value is a hash of 64 bit.
CHARACTER;
public boolean isNumeric() {
return this == UINT8 || this == INT32 || this == INT64
|| this == FP32 || this == FP64 || this== UINT4;
}
+
public boolean isUnknown() {
return this == UNKNOWN;
}
+
public boolean isPseudoNumeric() {
return isNumeric() || this == BOOLEAN || this ==
CHARACTER;
}
+
public String toExternalString() {
switch(this) {
case FP32:
@@ -100,10 +104,13 @@ public class Types
default: return toString();
}
}
+
public static ValueType fromExternalString(String value) {
//for now we support both internal and external strings
//until we have completely changed the external types
- String lValue = (value != null) ? value.toUpperCase() :
null;
+ if(value == null)
+ throw new DMLRuntimeException("Unknown null
value type");
+ final String lValue = value.toUpperCase();
switch(lValue) {
case "FP32": return FP32;
case "FP64":
@@ -117,6 +124,7 @@ public class Types
case "STRING": return STRING;
case "CHARACTER": return CHARACTER;
case "UNKNOWN": return UNKNOWN;
+ case "HASH64": return HASH64;
default:
throw new DMLRuntimeException("Unknown
value type: "+value);
}
@@ -143,6 +151,13 @@ public class Types
switch(a){
case CHARACTER:
return STRING;
+ case HASH64:
+ switch(b){
+ case STRING:
+ return b;
+ default:
+ return a;
+ }
case STRING:
return a;
case FP64:
diff --git
a/src/main/java/org/apache/sysds/runtime/compress/colgroup/APreAgg.java
b/src/main/java/org/apache/sysds/runtime/compress/colgroup/APreAgg.java
index 655bfc496f..17f210865b 100644
--- a/src/main/java/org/apache/sysds/runtime/compress/colgroup/APreAgg.java
+++ b/src/main/java/org/apache/sysds/runtime/compress/colgroup/APreAgg.java
@@ -154,7 +154,7 @@ public abstract class APreAgg extends AColGroupValue {
final boolean left = shouldPreAggregateLeft(lg);
if(!loggedWarningForDirect && shouldDirectMultiply(lg,
leftIdx.size(), rightIdx.size(), left)) {
loggedWarningForDirect = true;
- LOG.warn("Not implemented direct tsmm
colgroup");
+ LOG.warn("Not implemented direct tsmm colgroup:
" + lg.getClass().getSimpleName() + " %*% " + this.getClass().getSimpleName()
);
}
if(left) {
diff --git
a/src/main/java/org/apache/sysds/runtime/compress/lib/CLALibScalar.java
b/src/main/java/org/apache/sysds/runtime/compress/lib/CLALibScalar.java
index 0da3f2d969..3dea7f577a 100644
--- a/src/main/java/org/apache/sysds/runtime/compress/lib/CLALibScalar.java
+++ b/src/main/java/org/apache/sysds/runtime/compress/lib/CLALibScalar.java
@@ -58,7 +58,7 @@ public final class CLALibScalar {
public static MatrixBlock scalarOperations(ScalarOperator sop,
CompressedMatrixBlock m1, MatrixValue result) {
if(isInvalidForCompressedOutput(m1, sop)) {
- LOG.warn("scalar overlapping not supported for op: " +
sop.fn);
+ LOG.warn("scalar overlapping not supported for op: " +
sop.fn.getClass().getSimpleName());
MatrixBlock m1d = m1.decompress(sop.getNumThreads());
return m1d.scalarOperations(sop, result);
}
diff --git
a/src/main/java/org/apache/sysds/runtime/frame/data/columns/Array.java
b/src/main/java/org/apache/sysds/runtime/frame/data/columns/Array.java
index 874364255f..11accc814b 100644
--- a/src/main/java/org/apache/sysds/runtime/frame/data/columns/Array.java
+++ b/src/main/java/org/apache/sysds/runtime/frame/data/columns/Array.java
@@ -423,6 +423,8 @@ public abstract class Array<T> implements Writable {
case UINT4:
case UINT8:
throw new NotImplementedException();
+ case HASH64:
+ return new OptionalArray<>(changeTypeHash64(),
nulls);
case INT32:
return new OptionalArray<>(changeTypeInteger(),
nulls);
case INT64:
@@ -457,6 +459,8 @@ public abstract class Array<T> implements Writable {
case UINT4:
case UINT8:
throw new NotImplementedException();
+ case HASH64:
+ return changeTypeHash64();
case INT32:
return changeTypeInteger();
case INT64:
@@ -513,6 +517,13 @@ public abstract class Array<T> implements Writable {
*/
protected abstract Array<Long> changeTypeLong();
+ /**
+ * Change type to a Hash46 array type
+ *
+ * @return A Hash64 array
+ */
+ protected abstract Array<Object> changeTypeHash64();
+
/**
* Change type to a String array type
*
diff --git
a/src/main/java/org/apache/sysds/runtime/frame/data/columns/ArrayFactory.java
b/src/main/java/org/apache/sysds/runtime/frame/data/columns/ArrayFactory.java
index 12ca401c6b..2fd6a74837 100644
---
a/src/main/java/org/apache/sysds/runtime/frame/data/columns/ArrayFactory.java
+++
b/src/main/java/org/apache/sysds/runtime/frame/data/columns/ArrayFactory.java
@@ -23,6 +23,7 @@ import java.io.DataInput;
import java.io.IOException;
import java.util.BitSet;
+import org.apache.commons.lang3.NotImplementedException;
import org.apache.commons.logging.Log;
import org.apache.commons.logging.LogFactory;
import org.apache.sysds.common.Types.ValueType;
@@ -35,13 +36,27 @@ public interface ArrayFactory {
public final static int bitSetSwitchPoint = 64;
public enum FrameArrayType {
- STRING, BOOLEAN, BITSET, INT32, INT64, FP32, FP64, CHARACTER,
RAGGED, OPTIONAL, DDC;
+ STRING, BOOLEAN, BITSET, INT32, INT64, FP32, FP64,
+ CHARACTER, RAGGED, OPTIONAL, DDC,
+ HASH64;
}
public static StringArray create(String[] col) {
return new StringArray(col);
}
+ public static HashLongArray createHash64(String[] col){
+ return new HashLongArray(col);
+ }
+
+ public static OptionalArray<Object> createHash64Opt(String[] col){
+ return new OptionalArray<Object>(col, ValueType.HASH64);
+ }
+
+ public static HashLongArray createHash64(long[] col){
+ return new HashLongArray(col);
+ }
+
public static BooleanArray create(boolean[] col) {
return new BooleanArray(col);
}
@@ -81,6 +96,8 @@ public interface ArrayFactory {
public static long getInMemorySize(ValueType type, int _numRows,
boolean containsNull) {
if(containsNull) {
switch(type) {
+ case HASH64:
+ type = ValueType.INT64;
case BOOLEAN:
case INT64:
case FP64:
@@ -108,6 +125,7 @@ public interface ArrayFactory {
else
return
BooleanArray.estimateInMemorySize(_numRows);
case INT64:
+ case HASH64:
return Array.baseMemoryCost() + (long)
MemoryEstimates.longArrayCost(_numRows);
case FP64:
return Array.baseMemoryCost() + (long)
MemoryEstimates.doubleArrayCost(_numRows);
@@ -154,6 +172,8 @@ public interface ArrayFactory {
return new OptionalArray<>(new DoubleArray(new
double[nRow]), true);
case CHARACTER:
return new OptionalArray<>(new CharArray(new
char[nRow]), true);
+ case HASH64:
+ return new OptionalArray<>(new
HashLongArray(new long[nRow]), true);
case UNKNOWN:
case STRING:
default:
@@ -184,6 +204,8 @@ public interface ArrayFactory {
return new DoubleArray(new double[nRow]);
case CHARACTER:
return new CharArray(new char[nRow]);
+ case HASH64:
+ return new HashLongArray(new long[nRow]);
case UNKNOWN:
case STRING:
default:
@@ -222,9 +244,14 @@ public interface ArrayFactory {
return OptionalArray.readOpt(in, nRow);
case DDC:
return DDCArray.read(in);
- default: // String
+ case STRING:
arr = new StringArray(new String[nRow]);
break;
+ case HASH64:
+ arr = new HashLongArray(new long[nRow]);
+ break;
+ default:
+ throw new NotImplementedException(v + "");
}
arr.readFields(in);
return arr;
@@ -325,6 +352,8 @@ public interface ArrayFactory {
return IntegerArray.parseInt(s);
case INT64:
return LongArray.parseLong(s);
+ case HASH64:
+ return HashLongArray.parseHashLong(s);
case STRING:
case UNKNOWN:
default:
diff --git
a/src/main/java/org/apache/sysds/runtime/frame/data/columns/BitSetArray.java
b/src/main/java/org/apache/sysds/runtime/frame/data/columns/BitSetArray.java
index dbd5d7328c..710d8a8deb 100644
--- a/src/main/java/org/apache/sysds/runtime/frame/data/columns/BitSetArray.java
+++ b/src/main/java/org/apache/sysds/runtime/frame/data/columns/BitSetArray.java
@@ -465,6 +465,14 @@ public class BitSetArray extends ABooleanArray {
return new LongArray(ret);
}
+ @Override
+ protected Array<Object> changeTypeHash64(){
+ long[] ret = new long[size()];
+ for(int i = 0; i < size(); i++)
+ ret[i] = get(i) ? 1L : 0L;
+ return new HashLongArray(ret);
+ }
+
@Override
protected Array<String> changeTypeString() {
String[] ret = new String[size()];
diff --git
a/src/main/java/org/apache/sysds/runtime/frame/data/columns/BooleanArray.java
b/src/main/java/org/apache/sysds/runtime/frame/data/columns/BooleanArray.java
index da874555d3..b44845bc34 100644
---
a/src/main/java/org/apache/sysds/runtime/frame/data/columns/BooleanArray.java
+++
b/src/main/java/org/apache/sysds/runtime/frame/data/columns/BooleanArray.java
@@ -265,6 +265,14 @@ public class BooleanArray extends ABooleanArray {
return new LongArray(ret);
}
+ @Override
+ protected Array<Object> changeTypeHash64(){
+ long[] ret = new long[size()];
+ for(int i = 0; i < size(); i++)
+ ret[i] = _data[i] ? 1L : 0L;
+ return new HashLongArray(ret);
+ }
+
@Override
protected Array<String> changeTypeString() {
String[] ret = new String[size()];
diff --git
a/src/main/java/org/apache/sysds/runtime/frame/data/columns/CharArray.java
b/src/main/java/org/apache/sysds/runtime/frame/data/columns/CharArray.java
index 9862974ad7..14fcfd9f69 100644
--- a/src/main/java/org/apache/sysds/runtime/frame/data/columns/CharArray.java
+++ b/src/main/java/org/apache/sysds/runtime/frame/data/columns/CharArray.java
@@ -253,6 +253,14 @@ public class CharArray extends Array<Character> {
return new LongArray(ret);
}
+ @Override
+ protected Array<Object> changeTypeHash64(){
+ long[] ret = new long[size()];
+ for(int i = 0; i < size(); i++)
+ ret[i] = _data[i];
+ return new HashLongArray(ret);
+ }
+
@Override
protected Array<String> changeTypeString() {
String[] ret = new String[size()];
diff --git
a/src/main/java/org/apache/sysds/runtime/frame/data/columns/DDCArray.java
b/src/main/java/org/apache/sysds/runtime/frame/data/columns/DDCArray.java
index 4ddc3e4367..b634cfe6ff 100644
--- a/src/main/java/org/apache/sysds/runtime/frame/data/columns/DDCArray.java
+++ b/src/main/java/org/apache/sysds/runtime/frame/data/columns/DDCArray.java
@@ -231,6 +231,11 @@ public class DDCArray<T> extends ACompressedArray<T> {
return new DDCArray<>(dict.changeTypeLong(), map);
}
+ @Override
+ protected Array<Object> changeTypeHash64(){
+ return new DDCArray<>(dict.changeTypeHash64(), map);
+ }
+
@Override
protected Array<String> changeTypeString() {
return new DDCArray<>(dict.changeTypeString(), map);
diff --git
a/src/main/java/org/apache/sysds/runtime/frame/data/columns/DoubleArray.java
b/src/main/java/org/apache/sysds/runtime/frame/data/columns/DoubleArray.java
index 754748a28b..e4e1a76b6a 100644
--- a/src/main/java/org/apache/sysds/runtime/frame/data/columns/DoubleArray.java
+++ b/src/main/java/org/apache/sysds/runtime/frame/data/columns/DoubleArray.java
@@ -312,6 +312,17 @@ public class DoubleArray extends Array<Double> {
return new LongArray(ret);
}
+ @Override
+ protected Array<Object> changeTypeHash64() {
+ long[] ret = new long[size()];
+ for(int i = 0; i < size(); i++) {
+ if(_data[i] != (long) _data[i])
+ throw new DMLRuntimeException("Unable to change
to Long from Double array because of value:" + _data[i]);
+ ret[i] = (long) _data[i];
+ }
+ return new HashLongArray(ret);
+ }
+
@Override
protected Array<String> changeTypeString() {
String[] ret = new String[size()];
diff --git
a/src/main/java/org/apache/sysds/runtime/frame/data/columns/FloatArray.java
b/src/main/java/org/apache/sysds/runtime/frame/data/columns/FloatArray.java
index 51d29b167d..47627894d9 100644
--- a/src/main/java/org/apache/sysds/runtime/frame/data/columns/FloatArray.java
+++ b/src/main/java/org/apache/sysds/runtime/frame/data/columns/FloatArray.java
@@ -253,6 +253,14 @@ public class FloatArray extends Array<Float> {
return new LongArray(ret);
}
+ @Override
+ protected Array<Object> changeTypeHash64() {
+ long[] ret = new long[size()];
+ for(int i = 0; i < size(); i++)
+ ret[i] = (int) _data[i];
+ return new HashLongArray(ret);
+ }
+
@Override
protected Array<Float> changeTypeFloat() {
return this;
diff --git
a/src/main/java/org/apache/sysds/runtime/frame/data/columns/LongArray.java
b/src/main/java/org/apache/sysds/runtime/frame/data/columns/HashLongArray.java
similarity index 58%
copy from
src/main/java/org/apache/sysds/runtime/frame/data/columns/LongArray.java
copy to
src/main/java/org/apache/sysds/runtime/frame/data/columns/HashLongArray.java
index c1e0fe06c9..506c5d435f 100644
--- a/src/main/java/org/apache/sysds/runtime/frame/data/columns/LongArray.java
+++
b/src/main/java/org/apache/sysds/runtime/frame/data/columns/HashLongArray.java
@@ -22,119 +22,155 @@ package org.apache.sysds.runtime.frame.data.columns;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
-import java.nio.ByteBuffer;
-import java.nio.ByteOrder;
import java.util.Arrays;
import java.util.BitSet;
+import org.apache.commons.lang3.NotImplementedException;
import org.apache.sysds.common.Types.ValueType;
import org.apache.sysds.runtime.DMLRuntimeException;
import org.apache.sysds.runtime.frame.data.columns.ArrayFactory.FrameArrayType;
import org.apache.sysds.runtime.matrix.data.Pair;
-import org.apache.sysds.runtime.util.UtilFunctions;
import org.apache.sysds.utils.MemoryEstimates;
-public class LongArray extends Array<Long> {
+public class HashLongArray extends Array<Object> {
private long[] _data;
- public LongArray(long[] data) {
+ public HashLongArray(long[] data) {
super(data.length);
_data = data;
}
- public long[] get() {
- return _data;
+ public HashLongArray(String[] data) {
+ super(data.length);
+ _data = new long[data.length];
+ for(int i = 0; i < data.length; i++) {
+ _data[i] = parseHashLong(data[i]);
+ }
}
@Override
- public Long get(int index) {
- return _data[index];
+ public Object get() {
+ throw new NotImplementedException("Invalid to get underlying
array in Hash");
}
@Override
- public void set(int index, Long value) {
- _data[index] = (value != null) ? value : 0L;
+ public Object get(int index) {
+ return Long.toHexString(_data[index]);
+ }
+
+ public long getLong(int index) {
+ return _data[index];
}
@Override
- public void set(int index, double value) {
- _data[index] = (long) value;
+ public void set(int index, Object value) {
+ if(value instanceof String)
+ _data[index] = parseHashLong((String) value);
+ else if(value instanceof Long)
+ _data[index] = (Long) value;
+ else if (value == null)
+ _data[index] = 0L;
+ else
+ throw new NotImplementedException("not supported : " +
value);
}
@Override
public void set(int index, String value) {
- set(index, parseLong(value));
+ _data[index] = parseHashLong((String) value);
}
@Override
- public void set(int rl, int ru, Array<Long> value) {
+ public void set(int index, double value) {
+ _data[index] = (long) value;
+ }
+
+ @Override
+ public void set(int rl, int ru, Array<Object> value) {
set(rl, ru, value, 0);
}
@Override
public void setFromOtherType(int rl, int ru, Array<?> value) {
- final ValueType vt = value.getValueType();
for(int i = rl; i <= ru; i++)
- _data[i] = UtilFunctions.objectToLong(vt, value.get(i));
+ _data[i] = parseHashLong(value.get(i));
}
@Override
- public void set(int rl, int ru, Array<Long> value, int rlSrc) {
- try {
- // try system array copy.
- // but if it does not work, default to get.
- System.arraycopy(value.get(), rlSrc, _data, rl, ru - rl
+ 1);
+ public void setNz(int rl, int ru, Array<Object> value) {
+ if(value instanceof HashLongArray) {
+ long[] thatVals = ((HashLongArray) value)._data;
+ for(int i = rl; i <= ru; i++)
+ if(thatVals[i] != 0)
+ _data[i] = thatVals[i];
}
- catch(Exception e) {
- super.set(rl, ru, value, rlSrc);
+ else {
+ throw new NotImplementedException("Not supported type
of array: " + value.getClass().getSimpleName());
}
}
@Override
- public void setNz(int rl, int ru, Array<Long> value) {
- long[] data2 = ((LongArray) value)._data;
- for(int i = rl; i <= ru; i++)
- if(data2[i] != 0)
- _data[i] = data2[i];
+ public void setFromOtherTypeNz(int rl, int ru, Array<?> value) {
+ if(value instanceof HashLongArray)
+ setNz(rl, ru, (HashLongArray) value);
+ else if(value instanceof StringArray) {
+ StringArray st = ((StringArray) value);
+ for(int i = rl; i <= ru; i++)
+ if(st.get(i) != null)
+ _data[i] = parseHashLong(st.get(i));
+ }
+ else {
+ throw new NotImplementedException("Not supported type
of array: " + value.getClass().getSimpleName());
+ }
}
@Override
- public void setFromOtherTypeNz(int rl, int ru, Array<?> value) {
- final ValueType vt = value.getValueType();
- for(int i = rl; i <= ru; i++) {
- long v = UtilFunctions.objectToLong(vt, value.get(i));
- if(v != 0)
- _data[i] = v;
- }
+ public void append(Object value) {
+ append(parseHashLong(value));
}
@Override
public void append(String value) {
- append(parseLong(value));
+ append(parseHashLong(value));
}
- @Override
- public void append(Long value) {
+ public void append(long value) {
if(_data.length <= _size)
_data = Arrays.copyOf(_data, newSize());
- _data[_size++] = (value != null) ? value : 0L;
+ _data[_size++] = value;
}
@Override
- public Array<Long> append(Array<Long> other) {
- final int endSize = this._size + other.size();
- final long[] ret = new long[endSize];
- System.arraycopy(_data, 0, ret, 0, this._size);
- System.arraycopy(other.get(), 0, ret, this._size, other.size());
- if(other instanceof OptionalArray)
- return OptionalArray.appendOther((OptionalArray<Long>)
other, new LongArray(ret));
- else
- return new LongArray(ret);
+ public Array<Object> append(Array<Object> other) {
+ if(other instanceof HashLongArray) {
+
+ final int endSize = this._size + other.size();
+ final long[] ret = new long[endSize];
+ System.arraycopy(_data, 0, ret, 0, this._size);
+ System.arraycopy(((HashLongArray) other)._data, 0, ret,
this._size, other.size());
+ if(other instanceof OptionalArray)
+ return
OptionalArray.appendOther((OptionalArray<Object>) other, new
HashLongArray(ret));
+ else
+ return new HashLongArray(ret);
+ }
+ else if(other instanceof OptionalArray) {
+
+ OptionalArray<Object> ot = (OptionalArray<Object>)
other;
+ if(ot._a instanceof HashLongArray) {
+ Array<Object> a = this.append((HashLongArray)
ot._a);
+ return OptionalArray.appendOther(ot, a);
+ }
+ else {
+ throw new NotImplementedException("Invalid call
with not hashArray");
+ }
+ }
+ else {
+ throw new
NotImplementedException(other.getClass().getSimpleName() + " not append
supported in hashColumn");
+ }
}
@Override
public void write(DataOutput out) throws IOException {
- out.writeByte(FrameArrayType.INT64.ordinal());
+ out.writeByte(FrameArrayType.HASH64.ordinal());
for(int i = 0; i < _size; i++)
out.writeLong(_data[i]);
}
@@ -147,13 +183,13 @@ public class LongArray extends Array<Long> {
}
@Override
- public Array<Long> clone() {
- return new LongArray(Arrays.copyOf(_data, _size));
+ public Array<Object> clone() {
+ return new HashLongArray(Arrays.copyOf(_data, _size));
}
@Override
- public Array<Long> slice(int rl, int ru) {
- return new LongArray(Arrays.copyOfRange(_data, rl, ru));
+ public Array<Object> slice(int rl, int ru) {
+ return new HashLongArray(Arrays.copyOfRange(_data, rl, ru));
}
@Override
@@ -168,26 +204,22 @@ public class LongArray extends Array<Long> {
@Override
public byte[] getAsByteArray() {
- ByteBuffer longBuffer = ByteBuffer.allocate(8 * _size);
- longBuffer.order(ByteOrder.LITTLE_ENDIAN);
- for(int i = 0; i < _size; i++)
- longBuffer.putLong(_data[i]);
- return longBuffer.array();
+ throw new NotImplementedException("Unclear how this byte array
should look like for Hash");
}
@Override
public ValueType getValueType() {
- return ValueType.INT64;
+ return ValueType.HASH64;
}
@Override
public Pair<ValueType, Boolean> analyzeValueType() {
- return new Pair<>(ValueType.INT64, false);
+ return new Pair<>(ValueType.HASH64, false);
}
@Override
public FrameArrayType getFrameArrayType() {
- return FrameArrayType.INT64;
+ return FrameArrayType.HASH64;
}
@Override
@@ -246,7 +278,7 @@ public class LongArray extends Array<Long> {
protected Array<Integer> changeTypeInteger() {
int[] ret = new int[size()];
for(int i = 0; i < size(); i++) {
- if(Math.abs(_data[i]) > Integer.MAX_VALUE )
+ if(Math.abs(_data[i]) > Integer.MAX_VALUE)
throw new DMLRuntimeException("Unable to change
to integer from long array because of value:" + _data[i]);
ret[i] = (int) _data[i];
}
@@ -255,6 +287,11 @@ public class LongArray extends Array<Long> {
@Override
protected Array<Long> changeTypeLong() {
+ return new LongArray(_data);
+ }
+
+ @Override
+ protected Array<Object> changeTypeHash64() {
return this;
}
@@ -268,13 +305,16 @@ public class LongArray extends Array<Long> {
@Override
public void fill(String value) {
- fill(parseLong(value));
+ fill(parseHashLong(value));
}
@Override
+ public void fill(Object value) {
+ fill(parseHashLong(value));
+ }
+
public void fill(Long value) {
- value = value != null ? value : 0L;
- Arrays.fill(_data, value);
+ Arrays.fill(_data, value != null ? value : 0L);
}
@Override
@@ -282,18 +322,21 @@ public class LongArray extends Array<Long> {
return _data[i];
}
- public static long parseLong(String s) {
+ public static long parseHashLong(Object s) {
+ if(s == null)
+ return 0L;
+ else if(s instanceof String)
+ return parseHashLong((String) s);
+ else if(s instanceof Long)
+ return (Long) s;
+ else
+ throw new NotImplementedException("not supported" + s);
+ }
+
+ public static long parseHashLong(String s) {
if(s == null || s.isEmpty())
- return 0;
- try {
- return Long.parseLong(s);
- }
- catch(NumberFormatException e) {
- if(s.contains("."))
- return (long) Double.parseDouble(s);
- else
- throw e;
- }
+ return 0L;
+ return Long.parseUnsignedLong(s, 16);
}
@Override
@@ -318,21 +361,21 @@ public class LongArray extends Array<Long> {
}
@Override
- public Array<Long> select(int[] indices) {
+ public Array<Object> select(int[] indices) {
final long[] ret = new long[indices.length];
for(int i = 0; i < indices.length; i++)
ret[i] = _data[indices[i]];
- return new LongArray(ret);
+ return new HashLongArray(ret);
}
@Override
- public Array<Long> select(boolean[] select, int nTrue) {
+ public Array<Object> select(boolean[] select, int nTrue) {
final long[] ret = new long[nTrue];
int k = 0;
for(int i = 0; i < select.length; i++)
if(select[i])
ret[k++] = _data[i];
- return new LongArray(ret);
+ return new HashLongArray(ret);
}
@Override
@@ -346,15 +389,15 @@ public class LongArray extends Array<Long> {
}
@Override
- public boolean equals(Array<Long> other) {
- if(other instanceof LongArray)
- return Arrays.equals(_data, ((LongArray) other)._data);
+ public boolean equals(Array<Object> other) {
+ if(other instanceof HashLongArray)
+ return Arrays.equals(_data, ((HashLongArray)
other)._data);
else
return false;
}
@Override
- public boolean possiblyContainsNaN(){
+ public boolean possiblyContainsNaN() {
return false;
}
diff --git
a/src/main/java/org/apache/sysds/runtime/frame/data/columns/IntegerArray.java
b/src/main/java/org/apache/sysds/runtime/frame/data/columns/IntegerArray.java
index df60803dda..4a180e264c 100644
---
a/src/main/java/org/apache/sysds/runtime/frame/data/columns/IntegerArray.java
+++
b/src/main/java/org/apache/sysds/runtime/frame/data/columns/IntegerArray.java
@@ -255,6 +255,14 @@ public class IntegerArray extends Array<Integer> {
return new LongArray(ret);
}
+ @Override
+ protected Array<Object> changeTypeHash64() {
+ long[] ret = new long[size()];
+ for(int i = 0; i < size(); i++)
+ ret[i] = _data[i];
+ return new HashLongArray(ret);
+ }
+
@Override
protected Array<String> changeTypeString() {
String[] ret = new String[size()];
diff --git
a/src/main/java/org/apache/sysds/runtime/frame/data/columns/LongArray.java
b/src/main/java/org/apache/sysds/runtime/frame/data/columns/LongArray.java
index c1e0fe06c9..4d90190f67 100644
--- a/src/main/java/org/apache/sysds/runtime/frame/data/columns/LongArray.java
+++ b/src/main/java/org/apache/sysds/runtime/frame/data/columns/LongArray.java
@@ -258,6 +258,11 @@ public class LongArray extends Array<Long> {
return this;
}
+ @Override
+ protected Array<Object> changeTypeHash64() {
+ return new HashLongArray(_data);
+ }
+
@Override
protected Array<String> changeTypeString() {
String[] ret = new String[size()];
diff --git
a/src/main/java/org/apache/sysds/runtime/frame/data/columns/OptionalArray.java
b/src/main/java/org/apache/sysds/runtime/frame/data/columns/OptionalArray.java
index 99444015d4..6699f1050a 100644
---
a/src/main/java/org/apache/sysds/runtime/frame/data/columns/OptionalArray.java
+++
b/src/main/java/org/apache/sysds/runtime/frame/data/columns/OptionalArray.java
@@ -63,6 +63,17 @@ public class OptionalArray<T> extends Array<T> {
}
}
+ @SuppressWarnings("unchecked")
+ public OptionalArray(T[] a, ValueType vt){
+ super(a.length);
+ _a = (Array<T>) ArrayFactory.allocate(vt, a.length);
+ _n = ArrayFactory.allocateBoolean(a.length);
+ for(int i = 0; i < a.length; i++) {
+ _a.set(i, a[i]);
+ _n.set(i, a[i] != null);
+ }
+ }
+
public OptionalArray(Array<T> a, boolean empty) {
super(a.size());
if(a instanceof OptionalArray)
@@ -342,6 +353,12 @@ public class OptionalArray<T> extends Array<T> {
return new OptionalArray<>(a, _n);
}
+ @Override
+ protected Array<Object> changeTypeHash64() {
+ Array<Object> a = _a.changeTypeHash64();
+ return new OptionalArray<>(a, _n);
+ }
+
@Override
protected Array<Character> changeTypeCharacter() {
Array<Character> a = _a.changeTypeCharacter();
diff --git
a/src/main/java/org/apache/sysds/runtime/frame/data/columns/RaggedArray.java
b/src/main/java/org/apache/sysds/runtime/frame/data/columns/RaggedArray.java
index a63026b148..94a30f4980 100644
--- a/src/main/java/org/apache/sysds/runtime/frame/data/columns/RaggedArray.java
+++ b/src/main/java/org/apache/sysds/runtime/frame/data/columns/RaggedArray.java
@@ -288,6 +288,11 @@ public class RaggedArray<T> extends Array<T> {
return _a.changeTypeLong();
}
+ @Override
+ protected Array<Object> changeTypeHash64() {
+ return _a.changeTypeHash64();
+ }
+
@Override
protected Array<String> changeTypeString() {
return _a.changeTypeString();
diff --git
a/src/main/java/org/apache/sysds/runtime/frame/data/columns/StringArray.java
b/src/main/java/org/apache/sysds/runtime/frame/data/columns/StringArray.java
index fd86286972..03c2c7cc82 100644
--- a/src/main/java/org/apache/sysds/runtime/frame/data/columns/StringArray.java
+++ b/src/main/java/org/apache/sysds/runtime/frame/data/columns/StringArray.java
@@ -22,7 +22,6 @@ package org.apache.sysds.runtime.frame.data.columns;
import java.io.DataInput;
import java.io.DataOutput;
import java.io.IOException;
-import java.nio.charset.Charset;
import java.util.Arrays;
import java.util.BitSet;
import java.util.HashMap;
@@ -236,11 +235,17 @@ public class StringArray extends Array<String> {
}
private static final ValueType getHighest(ValueType state, ValueType c)
{
-
switch(state) {
+ case FP64:
+ switch(c) {
+ case HASH64:
+ return c;
+ default:
+ }
case FP32:
switch(c) {
case FP64:
+ case HASH64:
return c;
default:
}
@@ -249,6 +254,7 @@ public class StringArray extends Array<String> {
switch(c) {
case FP64:
case FP32:
+ case HASH64:
return c;
default:
}
@@ -258,6 +264,7 @@ public class StringArray extends Array<String> {
case FP64:
case FP32:
case INT64:
+ case HASH64:
return c;
default:
}
@@ -269,6 +276,7 @@ public class StringArray extends Array<String> {
case INT64:
case INT32:
case CHARACTER:
+ case HASH64:
return c;
default:
}
@@ -286,9 +294,8 @@ public class StringArray extends Array<String> {
boolean nulls = false;
for(int i = 0; i < _size; i++) {
final ValueType c = FrameUtil.isType(_data[i], state);
- if(c == ValueType.STRING) {
+ if(c == ValueType.STRING)
return new Pair<>(ValueType.STRING, false);
- }
else if(c == ValueType.UNKNOWN)
nulls = true;
else
@@ -560,6 +567,22 @@ public class StringArray extends Array<String> {
}
}
+ @Override
+ protected Array<Object> changeTypeHash64() {
+ try {
+ long[] ret = new long[size()];
+ for(int i = 0; i < size(); i++) {
+ final String s = _data[i];
+ if(s != null)
+ ret[i] = Long.parseLong(s, 16);
+ }
+ return new HashLongArray(ret);
+ }
+ catch(NumberFormatException e) {
+ throw new DMLRuntimeException("Unable to change to
Hash64 from String array", e);
+ }
+ }
+
@Override
public Array<Character> changeTypeCharacter() {
char[] ret = new char[size()];
diff --git
a/src/main/java/org/apache/sysds/runtime/frame/data/lib/FrameLibApplySchema.java
b/src/main/java/org/apache/sysds/runtime/frame/data/lib/FrameLibApplySchema.java
index 92372ecab2..f782933307 100644
---
a/src/main/java/org/apache/sysds/runtime/frame/data/lib/FrameLibApplySchema.java
+++
b/src/main/java/org/apache/sysds/runtime/frame/data/lib/FrameLibApplySchema.java
@@ -20,6 +20,7 @@
package org.apache.sysds.runtime.frame.data.lib;
import java.util.ArrayList;
+import java.util.Arrays;
import java.util.List;
import java.util.concurrent.ExecutionException;
import java.util.concurrent.ExecutorService;
diff --git
a/src/main/java/org/apache/sysds/runtime/frame/data/lib/FrameUtil.java
b/src/main/java/org/apache/sysds/runtime/frame/data/lib/FrameUtil.java
index 705aeb24c3..309560c46d 100644
--- a/src/main/java/org/apache/sysds/runtime/frame/data/lib/FrameUtil.java
+++ b/src/main/java/org/apache/sysds/runtime/frame/data/lib/FrameUtil.java
@@ -122,6 +122,18 @@ public interface FrameUtil {
return null;
}
+ public static ValueType isHash(final String val, final int len) {
+ if(len == 8) {
+ for(int i = 0; i < 8; i++) {
+ char v = val.charAt(i);
+ if(v < '0' || v > 'f')
+ return null;
+ }
+ return ValueType.HASH64;
+ }
+ return null;
+ }
+
public static ValueType isFloatType(final String val, final int len) {
if(len <= 30 && (simpleFloatMatch(val, len) ||
floatPattern.matcher(val).matches())) {
if(len <= 7 || (len == 8 && val.charAt(0) == '-'))
@@ -169,7 +181,7 @@ public interface FrameUtil {
final char c = val.charAt(i);
if(c >= '0' && c <= '9')
continue;
- else if(c == '.' || c == ','){
+ else if(c == '.' || c == ',') {
if(encounteredDot == true)
return false;
else
@@ -209,7 +221,7 @@ public interface FrameUtil {
switch(minType) {
case UNKNOWN:
case BOOLEAN:
- // case CHARACTER:
+ // case CHARACTER:
if(isBooleanType(val, len) != null)
return ValueType.BOOLEAN;
case UINT8:
@@ -226,6 +238,10 @@ public interface FrameUtil {
case CHARACTER:
if(len == 1)
return ValueType.CHARACTER;
+ case HASH64:
+ r = isHash(val, len);
+ if(r != null)
+ return r;
case STRING:
default:
return ValueType.STRING;
diff --git a/src/main/java/org/apache/sysds/runtime/util/UtilFunctions.java
b/src/main/java/org/apache/sysds/runtime/util/UtilFunctions.java
index 967855814f..b46792da02 100644
--- a/src/main/java/org/apache/sysds/runtime/util/UtilFunctions.java
+++ b/src/main/java/org/apache/sysds/runtime/util/UtilFunctions.java
@@ -46,6 +46,7 @@ import org.apache.sysds.runtime.data.SparseBlock;
import org.apache.sysds.runtime.data.TensorIndexes;
import org.apache.sysds.runtime.frame.data.FrameBlock;
import org.apache.sysds.runtime.frame.data.columns.CharArray;
+import org.apache.sysds.runtime.frame.data.columns.HashLongArray;
import org.apache.sysds.runtime.instructions.spark.data.IndexedMatrixValue;
import org.apache.sysds.runtime.matrix.data.MatrixIndexes;
import org.apache.sysds.runtime.matrix.data.Pair;
@@ -483,15 +484,16 @@ public class UtilFunctions {
public static Object stringToObject(ValueType vt, String in) {
if( in == null || in.isEmpty() ) return null;
switch( vt ) {
- case STRING: return in;
- case BOOLEAN: return Boolean.parseBoolean(in);
+ case STRING: return in;
+ case BOOLEAN: return Boolean.parseBoolean(in);
case UINT4:
case UINT8:
- case INT32: return Integer.parseInt(in);
- case INT64: return Long.parseLong(in);
- case FP64: return Double.parseDouble(in);
- case FP32: return Float.parseFloat(in);
+ case INT32: return Integer.parseInt(in);
+ case INT64: return Long.parseLong(in);
+ case FP64: return Double.parseDouble(in);
+ case FP32: return Float.parseFloat(in);
case CHARACTER: return CharArray.parseChar(in);
+ case HASH64: return HashLongArray.parseHashLong(in);
default: throw new RuntimeException("Unsupported value
type: "+vt);
}
}
@@ -674,7 +676,7 @@ public class UtilFunctions {
public static Object objectToObject(ValueType vt, Object in) {
if( in instanceof Double && vt == ValueType.FP64
|| in instanceof Float && vt == ValueType.FP32
- || in instanceof Long && vt == ValueType.INT64
+ || in instanceof Long && (vt == ValueType.INT64 || vt
== ValueType.HASH64)
|| in instanceof Integer && vt == ValueType.INT32
|| in instanceof Boolean && vt == ValueType.BOOLEAN
|| in instanceof String && vt == ValueType.STRING )
diff --git a/src/test/java/org/apache/sysds/test/TestUtils.java
b/src/test/java/org/apache/sysds/test/TestUtils.java
index 45fe79a4a3..acda5eaf83 100644
--- a/src/test/java/org/apache/sysds/test/TestUtils.java
+++ b/src/test/java/org/apache/sysds/test/TestUtils.java
@@ -2549,6 +2549,7 @@ public class TestUtils {
case INT32: return random.nextInt();
case INT64: return random.nextLong();
case BOOLEAN: return random.nextBoolean();
+ case HASH64: return
Long.toHexString(random.nextLong());
case STRING:
return random.ints('a', 'z' + 1)
.limit(10)
diff --git
a/src/test/java/org/apache/sysds/test/component/frame/array/CustomArrayTests.java
b/src/test/java/org/apache/sysds/test/component/frame/array/CustomArrayTests.java
index f0dcbf9c6e..94a5810bf4 100644
---
a/src/test/java/org/apache/sysds/test/component/frame/array/CustomArrayTests.java
+++
b/src/test/java/org/apache/sysds/test/component/frame/array/CustomArrayTests.java
@@ -45,6 +45,7 @@ import org.apache.sysds.runtime.frame.data.columns.CharArray;
import org.apache.sysds.runtime.frame.data.columns.DDCArray;
import org.apache.sysds.runtime.frame.data.columns.DoubleArray;
import org.apache.sysds.runtime.frame.data.columns.FloatArray;
+import org.apache.sysds.runtime.frame.data.columns.HashLongArray;
import org.apache.sysds.runtime.frame.data.columns.IntegerArray;
import org.apache.sysds.runtime.frame.data.columns.LongArray;
import org.apache.sysds.runtime.frame.data.columns.OptionalArray;
@@ -857,7 +858,7 @@ public class CustomArrayTests {
try {
Array<Long> a = null;
Array<Long> b = new DDCArray<Long>(new LongArray(new
long[] {1, 2, 3, 4}), //
- MapToFactory.create(10, new int[] {0, 0, 0, 0,
1, 1, 1, 2, 2, 3,3}, 4));
+ MapToFactory.create(10, new int[] {0, 0, 0, 0,
1, 1, 1, 2, 2, 3, 3}, 4));
Array<Long> c = ArrayFactory.set(a, b, 10, 19, 20);
assertEquals((long) c.get(0), 0L);
assertEquals((long) c.get(10), 1L);
@@ -873,7 +874,7 @@ public class CustomArrayTests {
try {
Array<Long> a = null;
Array<Long> b = new DDCArray<Long>(new
OptionalArray<Long>(new Long[] {1L, 2L, 3L, 4L}), //
- MapToFactory.create(10, new int[] {0, 0, 0, 0,
1, 1, 1, 2, 2, 3,3}, 4));
+ MapToFactory.create(10, new int[] {0, 0, 0, 0,
1, 1, 1, 2, 2, 3, 3}, 4));
Array<Long> c = ArrayFactory.set(a, b, 10, 19, 20);
assertEquals(c.get(0), null);
assertEquals((long) c.get(10), 1L);
@@ -884,8 +885,6 @@ public class CustomArrayTests {
}
}
-
-
@Test
public void testSetOptionalB() {
try {
@@ -1364,4 +1363,52 @@ public class CustomArrayTests {
assertEquals(a.hashDouble(i), Double.NaN, 0.0);
}
}
+
+ @Test
+ public void parseHash() {
+ assertEquals(10, HashLongArray.parseHashLong("a"));
+ }
+
+ @Test
+ public void parseHash_ff() {
+ assertEquals(255, HashLongArray.parseHashLong("ff"));
+ }
+
+ @Test
+ public void parseHash_fff() {
+ assertEquals(4095, HashLongArray.parseHashLong("fff"));
+ }
+
+ @Test
+ public void parseHash_ffff() {
+ assertEquals(65535, HashLongArray.parseHashLong("ffff"));
+ }
+
+
+ @Test
+ public void parseHash_fffff() {
+ assertEquals(1048575, HashLongArray.parseHashLong("fffff"));
+ }
+
+ @Test
+ public void parseHash_ffffff() {
+ assertEquals(16777215, HashLongArray.parseHashLong("ffffff"));
+ }
+
+ @Test
+ public void parseHash_fffffff() {
+ assertEquals(268435455L,
HashLongArray.parseHashLong("fffffff"));
+ }
+
+
+ @Test
+ public void parseHash_ffffffff() {
+ assertEquals(4294967295L,
HashLongArray.parseHashLong("ffffffff"));
+ }
+
+ @Test
+ public void parseHash_ffffffff_ffffffff() {
+ assertEquals(-1,
HashLongArray.parseHashLong("ffffffffffffffff"));
+ }
+
}
diff --git
a/src/test/java/org/apache/sysds/test/component/frame/array/FrameArrayConstantTests.java
b/src/test/java/org/apache/sysds/test/component/frame/array/FrameArrayConstantTests.java
index ca707b7156..645eb30ad4 100644
---
a/src/test/java/org/apache/sysds/test/component/frame/array/FrameArrayConstantTests.java
+++
b/src/test/java/org/apache/sysds/test/component/frame/array/FrameArrayConstantTests.java
@@ -102,6 +102,8 @@ public class FrameArrayConstantTests {
@Test
public void testConstruction_1() {
try {
+ if(t == ValueType.HASH64)
+ return;
Array<?> a = ArrayFactory.allocate(t, nRow, "1.0");
for(int i = 0; i < nRow; i++)
assertEquals(a.getAsDouble(i), 1.0,
0.0000000001);
diff --git
a/src/test/java/org/apache/sysds/test/component/frame/array/FrameArrayTests.java
b/src/test/java/org/apache/sysds/test/component/frame/array/FrameArrayTests.java
index 35d4d0e87c..71211ab52c 100644
---
a/src/test/java/org/apache/sysds/test/component/frame/array/FrameArrayTests.java
+++
b/src/test/java/org/apache/sysds/test/component/frame/array/FrameArrayTests.java
@@ -49,6 +49,7 @@ import org.apache.sysds.runtime.frame.data.columns.CharArray;
import org.apache.sysds.runtime.frame.data.columns.DDCArray;
import org.apache.sysds.runtime.frame.data.columns.DoubleArray;
import org.apache.sysds.runtime.frame.data.columns.FloatArray;
+import org.apache.sysds.runtime.frame.data.columns.HashLongArray;
import org.apache.sysds.runtime.frame.data.columns.IntegerArray;
import org.apache.sysds.runtime.frame.data.columns.LongArray;
import org.apache.sysds.runtime.frame.data.columns.OptionalArray;
@@ -377,7 +378,7 @@ public class FrameArrayTests {
@Test
public void setWithDDC() {
if(a.size() > 31) {
- try{
+ try {
Array<?> t = a.clone();
Array<?> ddc = DDCArray.compressToDDC(//
@@ -388,20 +389,20 @@ public class FrameArrayTests {
assertEquals(t.get(0),
(Boolean) false);
break;
default:
-
+
}
}
- catch(DMLCompressionException e){
+ catch(DMLCompressionException e) {
// valid error, Illegal to set range in a
compressed array.
}
- catch(DMLRuntimeException e){
+ catch(DMLRuntimeException e) {
// is intentional here.
- if(!e.getMessage().contains("RaggedArray")){
+ if(!e.getMessage().contains("RaggedArray")) {
e.printStackTrace();
fail(e.getMessage());
}
}
- catch(Exception e){
+ catch(Exception e) {
e.printStackTrace();
fail(e.getMessage());
}
@@ -468,6 +469,7 @@ public class FrameArrayTests {
x = a.get();
break;
case RAGGED:
+ case HASH64:
case OPTIONAL:
try {
a.get();
@@ -538,6 +540,9 @@ public class FrameArrayTests {
case CHARACTER:
((Array<Character>) aa).set(start, end,
(Array<Character>) a, off);
break;
+ case HASH64:
+ ((Array<Object>) aa).set(start, end,
(Array<Object>) a, off);
+ break;
default:
throw new NotImplementedException();
}
@@ -593,6 +598,9 @@ public class FrameArrayTests {
case CHARACTER:
((Array<Character>) aa).set(start, end,
(Array<Character>) other);
break;
+ case HASH64:
+ ((Array<Object>) aa).set(start, end,
(Array<Object>) other);
+ break;
default:
throw new NotImplementedException();
}
@@ -602,6 +610,9 @@ public class FrameArrayTests {
catch(DMLCompressionException e) {
return;// valid
}
+ catch(NumberFormatException e){
+ return; // valid
+ }
catch(Exception e) {
e.printStackTrace();
fail(e.getMessage());
@@ -650,6 +661,16 @@ public class FrameArrayTests {
((Array<Character>) a).set(0, c);
assertEquals(((Array<Character>)
a).get(0), c);
return;
+ case HASH64:
+ String hash = "abcdefaaaa";
+ ((Array<Object>) a).set(0, hash);
+ assertEquals(((Array<Object>)
a).get(0), hash);
+ if(a instanceof HashLongArray) {
+ long hashL =
Long.parseUnsignedLong("abcdefaaaa", 16);
+ ((HashLongArray) a).set(0,
hashL);
+ assertEquals(((HashLongArray)
a).get(0), hash);
+ }
+ return;
default:
throw new NotImplementedException();
}
@@ -689,6 +710,9 @@ public class FrameArrayTests {
case CHARACTER:
assertEquals((int) ((Array<Character>)
a).get(0), 1);
return;
+ case HASH64:
+ assertEquals(((Array<Object>)
a).get(0), "1");
+ return;
default:
throw new NotImplementedException();
}
@@ -728,6 +752,9 @@ public class FrameArrayTests {
case CHARACTER:
assertEquals(((Array<Character>)
a).get(0), Character.valueOf((char) 0));
return;
+ case HASH64:
+ assertEquals(((Array<Object>)
a).get(0), "0");
+ return;
default:
throw new NotImplementedException();
}
@@ -928,6 +955,15 @@ public class FrameArrayTests {
aa.append(vci);
assertEquals((char) aa.get(aa.size() -
1), vc);
break;
+ case HASH64:
+ String hash = "aaaab";
+ aa.append(hash);
+ assertEquals(aa.get(aa.size() - 1),
hash);
+
+ hash = "abbbbaa";
+ aa.append(hash);
+ assertEquals(aa.get(aa.size() - 1),
hash);
+ break;
case UNKNOWN:
default:
throw new DMLRuntimeException("Invalid
type");
@@ -973,6 +1009,9 @@ public class FrameArrayTests {
case CHARACTER:
assertEquals((char)
aa.get(aa.size() - 1), 0);
break;
+ case HASH64:
+ assertEquals(aa.get(aa.size() -
1), "0");
+ break;
case UNKNOWN:
default:
throw new
DMLRuntimeException("Invalid type");
@@ -1020,6 +1059,9 @@ public class FrameArrayTests {
case CHARACTER:
assertEquals((char)
aa.get(aa.size() - 1), 0);
break;
+ case HASH64:
+ assertEquals(aa.get(aa.size() -
1), "0");
+ break;
case UNKNOWN:
default:
throw new
DMLRuntimeException("Invalid type");
@@ -1060,6 +1102,9 @@ public class FrameArrayTests {
case CHARACTER:
((Array<Character>)
aa).setNz((Array<Character>) a);
break;
+ case HASH64:
+ ((Array<Object>)
aa).setNz((Array<Object>) a);
+ break;
case UNKNOWN:
default:
throw new DMLRuntimeException("Invalid
type");
@@ -1082,7 +1127,6 @@ public class FrameArrayTests {
Array<?> aa = a.clone();
Array<String> af = (Array<String>)
aa.changeType(ValueType.STRING);
try {
-
aa.setFromOtherTypeNz(af);
}
catch(DMLCompressionException e) {
@@ -1102,7 +1146,6 @@ public class FrameArrayTests {
Array<?> aa = a.clone();
Array<String> af = (Array<String>)
aa.changeTypeWithNulls(ValueType.STRING);
try {
-
aa.setFromOtherTypeNz(af);
}
catch(DMLCompressionException e) {
@@ -1122,7 +1165,6 @@ public class FrameArrayTests {
Array<?> aa = a.clone();
Array<String> af = (Array<String>)
aa.changeType(ValueType.STRING);
try {
-
aa.setFromOtherType(0, af.size() - 1, af);
}
catch(DMLCompressionException e) {
@@ -1140,8 +1182,11 @@ public class FrameArrayTests {
public void testSetFromStringWithNull() {
Array<?> aa = a.clone();
Array<?> af;
- if(aa.getFrameArrayType() == FrameArrayType.OPTIONAL &&
aa.getValueType() != ValueType.STRING)
+ if(aa.getFrameArrayType() == FrameArrayType.OPTIONAL //
+ && aa.getValueType() != ValueType.STRING //
+ && aa.getValueType() != ValueType.HASH64) {
af = aa.changeTypeWithNulls(ValueType.FP64);
+ }
else
af = aa.changeTypeWithNulls(ValueType.STRING);
@@ -1289,7 +1334,6 @@ public class FrameArrayTests {
((Array<Character>) aa).set(0,
(Character) null);
assertTrue(aa.get(0) == null ||
aa.get(0).equals(Character.valueOf((char) 0)));
break;
-
case FP32:
((Array<Float>) aa).set(0, (Float)
null);
assertTrue(aa.get(0) == null ||
aa.get(0).equals(Float.valueOf(0.0f)));
@@ -1310,12 +1354,17 @@ public class FrameArrayTests {
((Array<Integer>) aa).set(0, (Integer)
null);
assertTrue(aa.get(0) == null ||
aa.get(0).equals(Integer.valueOf(0)));
break;
- default:
+ case HASH64:
+ aa.set(0, (String) null);
+ assertTrue(aa.get(0) == null ||
aa.get(0).equals("0"));
+ break;
case STRING:
case UNKNOWN:
aa.set(0, (String) null);
assertTrue(aa.get(0) == null);
break;
+ default:
+ throw new NotImplementedException();
}
}
catch(DMLCompressionException e) {
@@ -1374,6 +1423,12 @@ public class FrameArrayTests {
for(int i = 0; i < 10; i++)
assertEquals(aa.get(i +
a.size()), null);
break;
+ case HASH64:
+ aa = ((Array<Object>) aa).append(new
HashLongArray(new long[10]));
+ assertEquals(aa.size(), a.size() + 10);
+ for(int i = 0; i < 10; i++)
+ assertEquals(aa.get(i +
a.size()), "0");
+ break;
case UNKNOWN:
default:
throw new NotImplementedException("Not
supported");
@@ -1385,6 +1440,10 @@ public class FrameArrayTests {
catch(DMLCompressionException e) {
return; // valid
}
+ catch(Exception e) {
+ e.printStackTrace();
+ fail(e.getMessage());
+ }
}
@@ -1439,6 +1498,12 @@ public class FrameArrayTests {
if(!isOptional)
assertEquals(aa.get(a.size()),
null);
break;
+ case HASH64:
+ aa.append((String) null);
+ assertEquals(aa.size(), a.size() + 1);
+ if(!isOptional)
+ assertEquals(aa.get(a.size()),
"0");
+ break;
case UNKNOWN:
default:
throw new NotImplementedException("Not
supported");
@@ -1490,6 +1555,9 @@ public class FrameArrayTests {
case INT64:
aa = ((Array<Long>) aa).append(new
OptionalArray<>(new Long[10]));
break;
+ case HASH64:
+ aa = ((Array<Object>) aa).append(new
OptionalArray<>(new HashLongArray(new long[10]), true));
+ break;
case STRING:
return; // not relevant
case UNKNOWN:
@@ -1555,6 +1623,11 @@ public class FrameArrayTests {
for(int i = 0; i < aa.size();
i++)
assertEquals(aa.get(i),
null);
break;
+ case HASH64:
+ if(!isOptional)
+ for(int i = 0; i < aa.size();
i++)
+ assertEquals(aa.get(i),
"0");
+ break;
case UNKNOWN:
default:
throw new NotImplementedException("Not
supported");
@@ -1567,6 +1640,10 @@ public class FrameArrayTests {
catch(DMLCompressionException e) {
return;// valid
}
+ catch(Exception e) {
+ e.printStackTrace();
+ fail(e.getMessage());
+ }
}
@Test
@@ -1606,6 +1683,10 @@ public class FrameArrayTests {
for(int i = 0; i < aa.size(); i++)
assertEquals(aa.get(i), "1");
break;
+ case HASH64:
+ for(int i = 0; i < aa.size(); i++)
+ assertEquals(aa.get(i), "1");
+ break;
case UNKNOWN:
default:
throw new NotImplementedException("Not
supported");
@@ -1659,6 +1740,11 @@ public class FrameArrayTests {
for(int i = 0; i < aa.size(); i++)
assertEquals(aa.get(i), "1");
break;
+ case HASH64:
+ aa.fill("1");
+ for(int i = 0; i < aa.size(); i++)
+ assertEquals(aa.get(i), "1");
+ break;
case UNKNOWN:
default:
throw new NotImplementedException("Not
supported");
@@ -1721,6 +1807,12 @@ public class FrameArrayTests {
for(int i = 0; i < aa.size();
i++)
assertEquals(aa.get(i),
null);
break;
+ case HASH64:
+ ((Array<Object>) aa).fill((Object)
null);
+ if(!isOptional)
+ for(int i = 0; i < aa.size();
i++)
+ assertEquals(aa.get(i),
"0");
+ break;
case UNKNOWN:
default:
throw new NotImplementedException("Not
supported");
@@ -1788,7 +1880,6 @@ public class FrameArrayTests {
}
catch(Exception e) {
e.printStackTrace();
- LOG.error(a);
fail(e.getMessage());
}
}
@@ -1868,7 +1959,7 @@ public class FrameArrayTests {
DataOutputStream fos = new DataOutputStream(bos);
g.write(fos);
DataInputStream fis = new DataInputStream(new
ByteArrayInputStream(bos.toByteArray()));
- Array<?> gr = ArrayFactory.read(fis, nRow);
+ Array<?> gr = ArrayFactory.read(fis, nRow);
return gr;
}
catch(Exception e) {
@@ -1900,6 +1991,9 @@ public class FrameArrayTests {
case CHARACTER:
return DDCArray
.compressToDDC(ArrayFactory.create(generateRandomCharacterNUniqueLengthOpt(size,
seed, nUnique)));
+ case HASH64:
+ return DDCArray
+
.compressToDDC(ArrayFactory.createHash64(generateRandomHash64OptNUnique(size,
seed, nUnique)));
case OPTIONAL:
Random r = new Random(seed);
switch(r.nextInt(7)) {
@@ -1985,6 +2079,8 @@ public class FrameArrayTests {
return
ArrayFactory.create(generateRandomDoubleOpt(size, seed));
case CHARACTER:
return
ArrayFactory.create(generateRandomCharacterOpt(size, seed));
+ case HASH64:
+ return
ArrayFactory.createHash64Opt(generateRandomHash64Opt(size, seed));
case OPTIONAL:
case RAGGED: // lets not test this case here.
Random r = new Random(seed);
@@ -2051,6 +2147,8 @@ public class FrameArrayTests {
return
ArrayFactory.create(generateRandomDouble(size, seed));
case CHARACTER:
return
ArrayFactory.create(generateRandomChar(size, seed));
+ case HASH64:
+ return
ArrayFactory.createHash64(generateRandomHash64(size, seed));
case RAGGED:
Random rand = new Random(seed);
switch(rand.nextInt(7)) {
@@ -2082,6 +2180,8 @@ public class FrameArrayTests {
return
ArrayFactory.create(generateRandomFloatOpt(size, seed));
case 4:
return
ArrayFactory.create(generateRandomCharacterOpt(size, seed));
+ case 5:
+ return
ArrayFactory.create(generateRandomHash64Opt(size, seed));
default:
return
ArrayFactory.create(generateRandomBooleanOpt(size, seed));
}
@@ -2163,6 +2263,18 @@ public class FrameArrayTests {
return ret;
}
+ public static String[] generateRandomHash64OptNUnique(int size, int
seed, int nUnique) {
+ nUnique = Math.max(1, nUnique);
+ String[] rands = generateRandomHash64(nUnique, seed);
+ rands[rands.length - 1] = null;
+ Random r = new Random(seed + 1);
+
+ String[] ret = new String[size];
+ for(int i = 0; i < size; i++)
+ ret[i] = rands[r.nextInt(nUnique)];
+ return ret;
+ }
+
public static Character[] generateRandomCharacterNUniqueLengthOpt(int
size, int seed, int nUnique) {
Character[] rands = generateRandomCharacterOpt(nUnique, seed);
rands[rands.length - 1] = null;
@@ -2228,6 +2340,25 @@ public class FrameArrayTests {
return ret;
}
+ public static String[] generateRandomHash64(int size, int seed) {
+ Random r = new Random(seed);
+ String[] ret = new String[size];
+ for(int i = 0; i < size; i++) {
+ ret[i] = Long.toHexString(r.nextLong());
+ }
+ return ret;
+ }
+
+ public static String[] generateRandomHash64Opt(int size, int seed) {
+ Random r = new Random(seed);
+ String[] ret = new String[size];
+ for(int i = 0; i < size; i++) {
+ if(r.nextBoolean())
+ ret[i] = Long.toHexString(r.nextLong());
+ }
+ return ret;
+ }
+
public static String[] generateRandom01String(int size, int seed) {
Random r = new Random(seed);
String[] ret = new String[size];
diff --git
a/src/test/java/org/apache/sysds/test/component/frame/iterators/IteratorTest.java
b/src/test/java/org/apache/sysds/test/component/frame/iterators/IteratorTest.java
index c6f5bfd621..8ad57f3c52 100644
---
a/src/test/java/org/apache/sysds/test/component/frame/iterators/IteratorTest.java
+++
b/src/test/java/org/apache/sysds/test/component/frame/iterators/IteratorTest.java
@@ -22,6 +22,7 @@ package org.apache.sysds.test.component.frame.iterators;
import static org.junit.Assert.assertEquals;
import static org.junit.Assert.assertNotEquals;
import static org.junit.Assert.assertTrue;
+import static org.junit.Assert.fail;
import java.util.Arrays;
@@ -36,8 +37,20 @@ import org.junit.Test;
public class IteratorTest {
- private final FrameBlock fb1 = TestUtils.generateRandomFrameBlock(10,
10, 23);
- private final FrameBlock fb2 = TestUtils.generateRandomFrameBlock(40,
30, 22);
+ private final FrameBlock fb1;
+ private final FrameBlock fb2;
+
+ public IteratorTest() {
+ try {
+ fb1 = TestUtils.generateRandomFrameBlock(10, 10, 23);
+ fb2 = TestUtils.generateRandomFrameBlock(40, 30, 22);
+ }
+ catch(Exception e) {
+ e.printStackTrace();
+ fail(e.getMessage());
+ throw new RuntimeException(e);
+ }
+ }
@Test
public void StringObjectStringFB1() {
@@ -236,29 +249,27 @@ public class IteratorTest {
compareIterators(a, b);
}
-
- @Test(expected= DMLRuntimeException.class)
- public void invalidRange1(){
+ @Test(expected = DMLRuntimeException.class)
+ public void invalidRange1() {
IteratorFactory.getStringRowIterator(fb2, -1, 1);
}
- @Test(expected= DMLRuntimeException.class)
- public void invalidRange2(){
+ @Test(expected = DMLRuntimeException.class)
+ public void invalidRange2() {
IteratorFactory.getStringRowIterator(fb2, 132415, 132416);
}
- @Test(expected= DMLRuntimeException.class)
- public void invalidRange3(){
+ @Test(expected = DMLRuntimeException.class)
+ public void invalidRange3() {
IteratorFactory.getStringRowIterator(fb2, 13, 4);
}
- @Test(expected= DMLRuntimeException.class)
- public void remove(){
- RowIterator<?> a =IteratorFactory.getStringRowIterator(fb2, 0,
4);
+ @Test(expected = DMLRuntimeException.class)
+ public void remove() {
+ RowIterator<?> a = IteratorFactory.getStringRowIterator(fb2, 0,
4);
a.remove();
}
-
private static void compareIterators(RowIterator<?> a, RowIterator<?>
b) {
while(a.hasNext()) {
assertTrue(b.hasNext());