This is an automated email from the ASF dual-hosted git repository.
mboehm7 pushed a commit to branch main
in repository https://gitbox.apache.org/repos/asf/systemds.git
The following commit(s) were added to refs/heads/main by this push:
new dc8f527ea5 [MINOR] Fix formatting (tabs vs spaces), warnings, and
redundant code
dc8f527ea5 is described below
commit dc8f527ea5de5dde25abfb8f7a1aa5888a05e325
Author: Matthias Boehm <[email protected]>
AuthorDate: Fri Sep 1 14:12:06 2023 +0200
[MINOR] Fix formatting (tabs vs spaces), warnings, and redundant code
---
.../runtime/compress/io/CompressedWriteBlock.java | 2 +-
.../sysds/runtime/compress/utils/ACount.java | 2 +-
.../sysds/runtime/data/DenseBlockFP64DEDUP.java | 533 +++++++++++----------
.../runtime/frame/data/columns/ArrayFactory.java | 2 +-
.../gpu/context/CudaMemoryAllocator.java | 1 +
.../spark/data/PartitionedBroadcast.java | 1 -
.../sysds/runtime/matrix/data/LibMatrixMult.java | 2 +-
.../compress/CompressibleInputGenerator.java | 6 +-
.../sysds/test/component/compress/io/IOSpark.java | 4 +-
.../component/compress/lib/CLALibSliceTest.java | 190 ++++----
.../test/component/compress/util/CountMapTest.java | 11 +-
.../test/functions/io/binary/SerializeTest.java | 4 -
.../TransformFrameEncodeMultithreadedTest.java | 4 +-
.../TransformFrameEncodeWordEmbedding1Test.java | 220 ++++-----
.../TransformFrameEncodeWordEmbedding2Test.java | 515 ++++++++++----------
.../TransformFrameEncodeWordEmbeddingMMTest.java | 140 +++---
...ransformFrameEncodeWordEmbeddingRowSumTest.java | 382 +++++++--------
17 files changed, 1008 insertions(+), 1011 deletions(-)
diff --git
a/src/main/java/org/apache/sysds/runtime/compress/io/CompressedWriteBlock.java
b/src/main/java/org/apache/sysds/runtime/compress/io/CompressedWriteBlock.java
index 06600da46b..66e96241a4 100644
---
a/src/main/java/org/apache/sysds/runtime/compress/io/CompressedWriteBlock.java
+++
b/src/main/java/org/apache/sysds/runtime/compress/io/CompressedWriteBlock.java
@@ -33,7 +33,7 @@ import org.apache.sysds.runtime.matrix.data.MatrixBlock;
* either or.
*/
public class CompressedWriteBlock implements
WritableComparable<CompressedWriteBlock> , Serializable{
-
+ private static final long serialVersionUID = -587986086067463499L;
public MatrixBlock mb;
private enum CONTENT {
diff --git a/src/main/java/org/apache/sysds/runtime/compress/utils/ACount.java
b/src/main/java/org/apache/sysds/runtime/compress/utils/ACount.java
index 5b15d5a086..4634130430 100644
--- a/src/main/java/org/apache/sysds/runtime/compress/utils/ACount.java
+++ b/src/main/java/org/apache/sysds/runtime/compress/utils/ACount.java
@@ -41,7 +41,7 @@ public abstract class ACount<T> {
public abstract ACount<T> inc(T key, int c, int id);
public ACount<T> sort() {
- Sorter<T> s = new Sorter<T>();
+ Sorter<T> s = new Sorter<>();
s.sort(this);
return s.sorted;
}
diff --git
a/src/main/java/org/apache/sysds/runtime/data/DenseBlockFP64DEDUP.java
b/src/main/java/org/apache/sysds/runtime/data/DenseBlockFP64DEDUP.java
index 15433654c3..0d6b5cd9d5 100644
--- a/src/main/java/org/apache/sysds/runtime/data/DenseBlockFP64DEDUP.java
+++ b/src/main/java/org/apache/sysds/runtime/data/DenseBlockFP64DEDUP.java
@@ -29,270 +29,271 @@ import java.util.HashMap;
public class DenseBlockFP64DEDUP extends DenseBlockDRB
{
- private double[][] _data;
- private int _distinct = 0;
-
- protected DenseBlockFP64DEDUP(int[] dims) {
- super(dims);
- reset(_rlen, _odims, 0);
- }
-
- public int getNrDistinctRows(){
- return _distinct;
- }
-
- @Override
- protected void allocateBlock(int bix, int length) {
- _data[bix] = new double[length];
- }
-
- @Override
- public void reset(int rlen, int[] odims, double v) {
- if(rlen > capacity() / _odims[0])
- _data = new double[rlen][];
- else {
- if(v == 0.0) {
- for(int i = 0; i < rlen; i++)
- _data[i] = null;
- }
- else {
- for(int i = 0; i < rlen; i++) {
- if(odims[0] > _odims[0] ||_data[i] == null )
- allocateBlock(i, odims[0]);
- Arrays.fill(_data[i], 0, odims[0], v);
- }
- }
- }
- _rlen = rlen;
- _odims = odims;
- }
-
- @Override
- public void resetNoFill(int rlen, int[] odims) {
- if(_data == null || rlen > _rlen){
- _data = new double[rlen][];
- }
- _rlen = rlen;
- _odims = odims;
- }
-
- @Override
- public boolean isNumeric() {
- return true;
- }
-
- @Override
- public boolean isNumeric(Types.ValueType vt) {
- return Types.ValueType.FP64 == vt;
- }
-
- @Override
- public long capacity() {
- return (_data != null) ? ((long) _data.length)*_odims[0] : -1;
- }
-
- @Override
- public long countNonZeros() {
- long nnz = 0;
- HashMap<double[], Long> cache = new HashMap<double[], Long>();
- for (int i = 0; i < _rlen; i++) {
- double[] row = this._data[i];
- if(row == null)
- continue;
- Long count = cache.getOrDefault(row, null);
- if(count == null){
- count = Long.valueOf(countNonZeros(i));
- cache.put(row, count);
- }
- nnz += count;
- }
- this._distinct = cache.size();
- return nnz;
- }
-
- @Override
- public int countNonZeros(int r) {
- return _data[r] == null ? 0 : UtilFunctions.computeNnz(_data[r], 0,
_odims[0]);
- }
-
- @Override
- public long countNonZeros(int rl, int ru, int ol, int ou) {
- long nnz = 0;
- HashMap<double[], Long> cache = new HashMap<double[], Long>();
- for (int i = rl; i < ru; i++) {
- double[] row = this._data[i];
- if(row == null)
- continue;
- Long count = cache.getOrDefault(row, null);
- if(count == null){
- count = Long.valueOf(UtilFunctions.computeNnz(_data[i], ol,
ou));
- cache.put(row, count);
- }
- nnz += count;
- }
- return nnz;
- }
-
- @Override
- protected long computeNnz(int bix, int start, int length) {
- int nnz = 0;
- int row_start = (int) Math.floor(start / _odims[0]);
- int col_start = start % _odims[0];
- for (int i = 0; i < length; i++) {
- if(_data[row_start] == null){
- i += _odims[0] - 1 - col_start;
- col_start = 0;
- row_start += 1;
- continue;
- }
- nnz += _data[row_start][col_start] != 0 ? 1 : 0;
- col_start += 1;
- if(col_start == _odims[0]) {
- col_start = 0;
- row_start += 1;
- }
- }
- return nnz;
- }
-
- @Override
- public int pos(int r){
- return 0;
- }
-
- @Override
- public int pos(int r, int c){
- return c;
- }
-
- @Override
- public int pos(int[] ix){
- int pos = ix[ix.length - 1];
- for(int i = 1; i < ix.length - 1; i++)
- pos += ix[i] * _odims[i];
- return pos;
- }
-
- @Override
- public int blockSize(int bix) {
- return 1;
- }
- public boolean isContiguous(int rl, int ru){
- return rl == ru;
- }
- @Override
- public double[] values(int r) {
- return valuesAt(r);
- }
-
- @Override
- public double[] valuesAt(int bix) {
- return _data[bix] == null ? new double[_odims[0]] : _data[bix];
- }
-
- @Override
- public int index(int r) {
- return r;
- }
-
- @Override
- public int numBlocks(){
- return _data.length;
- }
-
- @Override
- public int size(int bix) {
- return _odims[0];
- }
-
- @Override
- public void incr(int r, int c) {
- incr(r,c,1.0);
- }
-
- @Override
- public void incr(int r, int c, double delta) {
- if(_data[r] == null)
- allocateBlock(r, _odims[0]);
- _data[r][c] += delta;
- }
-
- @Override
- protected void fillBlock(int bix, int fromIndex, int toIndex, double v) {
- if(_data[bix] == null)
- allocateBlock(bix, _odims[0]);
- Arrays.fill(_data[bix], fromIndex, toIndex, v);
- }
-
- @Override
- protected void setInternal(int bix, int ix, double v) {
- set(bix, ix, v);
- }
-
- @Override
- public DenseBlock set(int r, int c, double v) {
- if(_data[r] == null)
- _data[r] = new double[_odims[0]];
- _data[r][c] = v;
- return this;
- }
-
- @Override
- public DenseBlock set(int r, double[] v) {
- if(v.length == _odims[0])
- _data[r] = v;
- else
- throw new RuntimeException("set Denseblock called with an array
length [" + v.length +"], array to overwrite is of length [" + _odims[0] + "]");
- return this;
- }
-
- @Override
- public DenseBlock set(DenseBlock db) {
- throw new NotImplementedException();
- }
-
- @Override
- public DenseBlock set(int[] ix, double v) {
- return set(ix[0], pos(ix), v);
- }
-
- @Override
- public DenseBlock set(int[] ix, long v) {
- return set(ix[0], pos(ix), v);
- }
-
- @Override
- public DenseBlock set(int[] ix, String v) {
- return set(ix[0], pos(ix), Double.parseDouble(v));
- }
-
- @Override
- public double get(int r, int c) {
- if(_data[r] == null)
- return 0.0;
- else
- return _data[r][c];
- }
-
- @Override
- public double get(int[] ix) {
- return get(ix[0], pos(ix));
- }
-
- @Override
- public String getString(int[] ix) {
- return String.valueOf(get(ix[0], pos(ix)));
- }
-
- @Override
- public long getLong(int[] ix) {
- return UtilFunctions.toLong(get(ix[0], pos(ix)));
- }
-
- public double estimateMemory(){
- if( (double)_rlen + this._odims[0] > Long.MAX_VALUE )
- return Long.MAX_VALUE;
- return DenseBlock.estimateMemory(_rlen, _odims[0])
- + MemoryEstimates.doubleArrayCost(_odims[0])*_distinct +
MemoryEstimates.objectArrayCost(_rlen);
- }
+ private static final long serialVersionUID = -4012376952006079198L;
+ private double[][] _data;
+ private int _distinct = 0;
+
+ protected DenseBlockFP64DEDUP(int[] dims) {
+ super(dims);
+ reset(_rlen, _odims, 0);
+ }
+
+ public int getNrDistinctRows(){
+ return _distinct;
+ }
+
+ @Override
+ protected void allocateBlock(int bix, int length) {
+ _data[bix] = new double[length];
+ }
+
+ @Override
+ public void reset(int rlen, int[] odims, double v) {
+ if(rlen > capacity() / _odims[0])
+ _data = new double[rlen][];
+ else {
+ if(v == 0.0) {
+ for(int i = 0; i < rlen; i++)
+ _data[i] = null;
+ }
+ else {
+ for(int i = 0; i < rlen; i++) {
+ if(odims[0] > _odims[0] ||_data[i] ==
null )
+ allocateBlock(i, odims[0]);
+ Arrays.fill(_data[i], 0, odims[0], v);
+ }
+ }
+ }
+ _rlen = rlen;
+ _odims = odims;
+ }
+
+ @Override
+ public void resetNoFill(int rlen, int[] odims) {
+ if(_data == null || rlen > _rlen){
+ _data = new double[rlen][];
+ }
+ _rlen = rlen;
+ _odims = odims;
+ }
+
+ @Override
+ public boolean isNumeric() {
+ return true;
+ }
+
+ @Override
+ public boolean isNumeric(Types.ValueType vt) {
+ return Types.ValueType.FP64 == vt;
+ }
+
+ @Override
+ public long capacity() {
+ return (_data != null) ? ((long) _data.length)*_odims[0] : -1;
+ }
+
+ @Override
+ public long countNonZeros() {
+ long nnz = 0;
+ HashMap<double[], Long> cache = new HashMap<>();
+ for (int i = 0; i < _rlen; i++) {
+ double[] row = this._data[i];
+ if(row == null)
+ continue;
+ Long count = cache.getOrDefault(row, null);
+ if(count == null){
+ count = Long.valueOf(countNonZeros(i));
+ cache.put(row, count);
+ }
+ nnz += count;
+ }
+ this._distinct = cache.size();
+ return nnz;
+ }
+
+ @Override
+ public int countNonZeros(int r) {
+ return _data[r] == null ? 0 :
UtilFunctions.computeNnz(_data[r], 0, _odims[0]);
+ }
+
+ @Override
+ public long countNonZeros(int rl, int ru, int ol, int ou) {
+ long nnz = 0;
+ HashMap<double[], Long> cache = new HashMap<>();
+ for (int i = rl; i < ru; i++) {
+ double[] row = this._data[i];
+ if(row == null)
+ continue;
+ Long count = cache.getOrDefault(row, null);
+ if(count == null){
+ count =
Long.valueOf(UtilFunctions.computeNnz(_data[i], ol, ou));
+ cache.put(row, count);
+ }
+ nnz += count;
+ }
+ return nnz;
+ }
+
+ @Override
+ protected long computeNnz(int bix, int start, int length) {
+ int nnz = 0;
+ int row_start = (int) Math.floor(start / _odims[0]);
+ int col_start = start % _odims[0];
+ for (int i = 0; i < length; i++) {
+ if(_data[row_start] == null){
+ i += _odims[0] - 1 - col_start;
+ col_start = 0;
+ row_start += 1;
+ continue;
+ }
+ nnz += _data[row_start][col_start] != 0 ? 1 : 0;
+ col_start += 1;
+ if(col_start == _odims[0]) {
+ col_start = 0;
+ row_start += 1;
+ }
+ }
+ return nnz;
+ }
+
+ @Override
+ public int pos(int r){
+ return 0;
+ }
+
+ @Override
+ public int pos(int r, int c){
+ return c;
+ }
+
+ @Override
+ public int pos(int[] ix){
+ int pos = ix[ix.length - 1];
+ for(int i = 1; i < ix.length - 1; i++)
+ pos += ix[i] * _odims[i];
+ return pos;
+ }
+
+ @Override
+ public int blockSize(int bix) {
+ return 1;
+ }
+ public boolean isContiguous(int rl, int ru){
+ return rl == ru;
+ }
+ @Override
+ public double[] values(int r) {
+ return valuesAt(r);
+ }
+
+ @Override
+ public double[] valuesAt(int bix) {
+ return _data[bix] == null ? new double[_odims[0]] : _data[bix];
+ }
+
+ @Override
+ public int index(int r) {
+ return r;
+ }
+
+ @Override
+ public int numBlocks(){
+ return _data.length;
+ }
+
+ @Override
+ public int size(int bix) {
+ return _odims[0];
+ }
+
+ @Override
+ public void incr(int r, int c) {
+ incr(r,c,1.0);
+ }
+
+ @Override
+ public void incr(int r, int c, double delta) {
+ if(_data[r] == null)
+ allocateBlock(r, _odims[0]);
+ _data[r][c] += delta;
+ }
+
+ @Override
+ protected void fillBlock(int bix, int fromIndex, int toIndex, double v)
{
+ if(_data[bix] == null)
+ allocateBlock(bix, _odims[0]);
+ Arrays.fill(_data[bix], fromIndex, toIndex, v);
+ }
+
+ @Override
+ protected void setInternal(int bix, int ix, double v) {
+ set(bix, ix, v);
+ }
+
+ @Override
+ public DenseBlock set(int r, int c, double v) {
+ if(_data[r] == null)
+ _data[r] = new double[_odims[0]];
+ _data[r][c] = v;
+ return this;
+ }
+
+ @Override
+ public DenseBlock set(int r, double[] v) {
+ if(v.length == _odims[0])
+ _data[r] = v;
+ else
+ throw new RuntimeException("set Denseblock called with
an array length [" + v.length +"], array to overwrite is of length [" +
_odims[0] + "]");
+ return this;
+ }
+
+ @Override
+ public DenseBlock set(DenseBlock db) {
+ throw new NotImplementedException();
+ }
+
+ @Override
+ public DenseBlock set(int[] ix, double v) {
+ return set(ix[0], pos(ix), v);
+ }
+
+ @Override
+ public DenseBlock set(int[] ix, long v) {
+ return set(ix[0], pos(ix), v);
+ }
+
+ @Override
+ public DenseBlock set(int[] ix, String v) {
+ return set(ix[0], pos(ix), Double.parseDouble(v));
+ }
+
+ @Override
+ public double get(int r, int c) {
+ if(_data[r] == null)
+ return 0.0;
+ else
+ return _data[r][c];
+ }
+
+ @Override
+ public double get(int[] ix) {
+ return get(ix[0], pos(ix));
+ }
+
+ @Override
+ public String getString(int[] ix) {
+ return String.valueOf(get(ix[0], pos(ix)));
+ }
+
+ @Override
+ public long getLong(int[] ix) {
+ return UtilFunctions.toLong(get(ix[0], pos(ix)));
+ }
+
+ public double estimateMemory(){
+ if( (double)_rlen + this._odims[0] > Long.MAX_VALUE )
+ return Long.MAX_VALUE;
+ return DenseBlock.estimateMemory(_rlen, _odims[0])
+ +
MemoryEstimates.doubleArrayCost(_odims[0])*_distinct +
MemoryEstimates.objectArrayCost(_rlen);
+ }
}
diff --git
a/src/main/java/org/apache/sysds/runtime/frame/data/columns/ArrayFactory.java
b/src/main/java/org/apache/sysds/runtime/frame/data/columns/ArrayFactory.java
index 92f4ee4c31..b0de2460d9 100644
---
a/src/main/java/org/apache/sysds/runtime/frame/data/columns/ArrayFactory.java
+++
b/src/main/java/org/apache/sysds/runtime/frame/data/columns/ArrayFactory.java
@@ -75,7 +75,7 @@ public interface ArrayFactory {
}
public static <T> RaggedArray<T> create(T[] col, int m) {
- return new RaggedArray<T>(col, m);
+ return new RaggedArray<>(col, m);
}
public static long getInMemorySize(ValueType type, int _numRows,
boolean containsNull) {
diff --git
a/src/main/java/org/apache/sysds/runtime/instructions/gpu/context/CudaMemoryAllocator.java
b/src/main/java/org/apache/sysds/runtime/instructions/gpu/context/CudaMemoryAllocator.java
index 805bc8ea66..36120eccbd 100644
---
a/src/main/java/org/apache/sysds/runtime/instructions/gpu/context/CudaMemoryAllocator.java
+++
b/src/main/java/org/apache/sysds/runtime/instructions/gpu/context/CudaMemoryAllocator.java
@@ -47,6 +47,7 @@ public class CudaMemoryAllocator implements
GPUMemoryAllocator {
@Override
public void allocate(Pointer devPtr, long size) {
try {
+ @SuppressWarnings("unused")
int status = cudaMalloc(devPtr, size);
}
catch(CudaException e) {
diff --git
a/src/main/java/org/apache/sysds/runtime/instructions/spark/data/PartitionedBroadcast.java
b/src/main/java/org/apache/sysds/runtime/instructions/spark/data/PartitionedBroadcast.java
index bf2c5356b5..91e2f2a756 100644
---
a/src/main/java/org/apache/sysds/runtime/instructions/spark/data/PartitionedBroadcast.java
+++
b/src/main/java/org/apache/sysds/runtime/instructions/spark/data/PartitionedBroadcast.java
@@ -23,7 +23,6 @@ import java.io.Serializable;
import java.util.ArrayList;
import org.apache.spark.broadcast.Broadcast;
-import org.apache.sysds.runtime.DMLRuntimeException;
import org.apache.sysds.runtime.controlprogram.caching.CacheBlock;
import org.apache.sysds.runtime.controlprogram.caching.CacheBlockFactory;
import org.apache.sysds.runtime.controlprogram.context.SparkExecutionContext;
diff --git
a/src/main/java/org/apache/sysds/runtime/matrix/data/LibMatrixMult.java
b/src/main/java/org/apache/sysds/runtime/matrix/data/LibMatrixMult.java
index 54ac8d2e22..51a26f3539 100644
--- a/src/main/java/org/apache/sysds/runtime/matrix/data/LibMatrixMult.java
+++ b/src/main/java/org/apache/sysds/runtime/matrix/data/LibMatrixMult.java
@@ -265,7 +265,7 @@ public class LibMatrixMult
ArrayList<MatrixMultTask> tasks = new ArrayList<>();
ArrayList<Integer> blklens =
UtilFunctions.getBalancedBlockSizesDefault(num, k,
(pm2r || pm2c || ret.denseBlock instanceof
DenseBlockFP64DEDUP));
- ConcurrentHashMap<double[], double[]> cache =
m1.denseBlock instanceof DenseBlockFP64DEDUP ? new ConcurrentHashMap(): null;
+ ConcurrentHashMap<double[], double[]> cache =
m1.denseBlock instanceof DenseBlockFP64DEDUP ? new ConcurrentHashMap<>(): null;
for(int i = 0, lb = 0; i < blklens.size(); lb +=
blklens.get(i), i++)
tasks.add(new MatrixMultTask(m1, m2, ret, tm2,
pm2r, pm2c, m1Perm, sparse, lb, lb + blklens.get(i), cache));
// execute tasks
diff --git
a/src/test/java/org/apache/sysds/test/component/compress/CompressibleInputGenerator.java
b/src/test/java/org/apache/sysds/test/component/compress/CompressibleInputGenerator.java
index 951dff90c3..0e42a7fb59 100644
---
a/src/test/java/org/apache/sysds/test/component/compress/CompressibleInputGenerator.java
+++
b/src/test/java/org/apache/sysds/test/component/compress/CompressibleInputGenerator.java
@@ -195,17 +195,17 @@ public class CompressibleInputGenerator {
if(r.nextDouble() < sparsity) {
if(transpose &&
output.isInSparseFormat()) {
int v = (int)
(output.getValue(0, x) * y);
- double d = Math.abs(v % ((int)
(diff))) + min;
+ double d = Math.abs(v % diff) +
min;
output.appendValue(y, x, d);
}
else if(transpose) {
int v = (int)
(output.getValue(0, x) * y);
- double d = Math.abs(v % ((int)
(diff))) + min;
+ double d = Math.abs(v % diff) +
min;
output.quickSetValue(y, x, d);
}
else {
int v = (int)
(output.getValue(x, 0) * y);
- double d = Math.abs(v % ((int)
(diff))) + min;
+ double d = Math.abs(v % diff) +
min;
output.quickSetValue(x, y, d);
}
}
diff --git
a/src/test/java/org/apache/sysds/test/component/compress/io/IOSpark.java
b/src/test/java/org/apache/sysds/test/component/compress/io/IOSpark.java
index 436ba9d1e2..5bfdacf42f 100644
--- a/src/test/java/org/apache/sysds/test/component/compress/io/IOSpark.java
+++ b/src/test/java/org/apache/sysds/test/component/compress/io/IOSpark.java
@@ -287,7 +287,7 @@ public class IOSpark {
// Read in again as RDD
JavaPairRDD<MatrixIndexes, MatrixBlock> m = getRDD(f1);
MatrixReader r = ReaderCompressed.create();
- MatrixBlock mb2 = r.readMatrixFromHDFS(f1, (long)
mb.getNumRows(), (long) mb.getNumColumns(), blen1, -1L);
+ MatrixBlock mb2 = r.readMatrixFromHDFS(f1,
mb.getNumRows(), mb.getNumColumns(), blen1, -1L);
TestUtils.compareMatricesBitAvgDistance(mb, mb2, 0, 0);
String f2 = getName(); // get new name for writing RDD.
// Write RDD to disk
@@ -403,7 +403,7 @@ public class IOSpark {
WriterCompressed.writeCompressedMatrixToHDFS(mb, n,
blen);
Thread.sleep(100);
MatrixReader r = ReaderCompressed.create();
- MatrixBlock mb2 = r.readMatrixFromHDFS(n, (long)
mb.getNumRows(), (long) mb.getNumColumns(), blen, -1L);
+ MatrixBlock mb2 = r.readMatrixFromHDFS(n,
mb.getNumRows(), mb.getNumColumns(), blen, -1L);
TestUtils.compareMatricesBitAvgDistance(mb, mb2, 0, 0);
SparkExecutionContext ec =
ExecutionContextFactory.createSparkExecutionContext();
diff --git
a/src/test/java/org/apache/sysds/test/component/compress/lib/CLALibSliceTest.java
b/src/test/java/org/apache/sysds/test/component/compress/lib/CLALibSliceTest.java
index 7e93e81d01..f3c2ffa6be 100644
---
a/src/test/java/org/apache/sysds/test/component/compress/lib/CLALibSliceTest.java
+++
b/src/test/java/org/apache/sysds/test/component/compress/lib/CLALibSliceTest.java
@@ -35,99 +35,99 @@ import
org.apache.sysds.runtime.compress.colgroup.indexes.IColIndex;
import org.junit.Test;
public class CLALibSliceTest {
- protected static final Log LOG =
LogFactory.getLog(CLALibSliceTest.class.getName());
-
- @Test
- public void sliceColumnsRanges() {
- List<AColGroup> gs = new ArrayList<AColGroup>();
- for(int i = 0; i < 10; i++) {
- gs.add(new ColGroupEmpty(ColIndexFactory.create(i * 10, i * 10 +
10)));
- }
-
- CompressedMatrixBlock cmb = new CompressedMatrixBlock(100, 100, -1,
false, gs);
-
- CompressedMatrixBlock cmb2 = (CompressedMatrixBlock) cmb.slice(0, 99,
5, 49);
- assertEquals(49 - 4, cmb2.getNumColumns());
- assertEquals(5, cmb2.getColGroups().size());
-
- int countColumns = 0;
- for(AColGroup g : cmb2.getColGroups()) {
- IColIndex idx = g.getColIndices();
- countColumns += idx.size();
- assertTrue(idx.get(0) >= 0);
- assertTrue(idx.get(idx.size() - 1) < cmb.getNumColumns());
- }
- assertEquals(cmb2.getNumColumns(), countColumns);
- }
-
- @Test
- public void sliceSingleColumns() {
- List<AColGroup> gs = new ArrayList<AColGroup>();
- for(int i = 0; i < 50; i++) {
- gs.add(new ColGroupEmpty(ColIndexFactory.create(i, i + 1)));
- }
-
- CompressedMatrixBlock cmb = new CompressedMatrixBlock(100, 50, -1,
false, gs);
-
- CompressedMatrixBlock cmb2 = (CompressedMatrixBlock) cmb.slice(0, 99,
5, 40);
- assertEquals(40 - 4, cmb2.getNumColumns());
- assertEquals(40 - 4, cmb2.getColGroups().size());
-
- int countColumns = 0;
- for(AColGroup g : cmb2.getColGroups()) {
- IColIndex idx = g.getColIndices();
- countColumns += idx.size();
- assertTrue(idx.get(0) >= 0);
- assertTrue(idx.get(idx.size() - 1) < cmb.getNumColumns());
- }
- assertEquals(cmb2.getNumColumns(), countColumns);
- }
-
- @Test
- public void sliceTwoColumns() {
- List<AColGroup> gs = new ArrayList<AColGroup>();
- for(int i = 0; i < 50; i+=2) {
- gs.add(new ColGroupEmpty(ColIndexFactory.createI(i, i +1)));
- }
-
- CompressedMatrixBlock cmb = new CompressedMatrixBlock(100, 50, -1,
false, gs);
-
- CompressedMatrixBlock cmb2 = (CompressedMatrixBlock) cmb.slice(0, 99,
5, 40);
- assertEquals(40 - 4, cmb2.getNumColumns());
- assertEquals((40 - 4) /2 + 1, cmb2.getColGroups().size());
-
- int countColumns = 0;
- for(AColGroup g : cmb2.getColGroups()) {
- IColIndex idx = g.getColIndices();
- countColumns += idx.size();
- assertTrue(idx.get(0) >= 0);
- assertTrue(idx.get(idx.size() - 1) < cmb.getNumColumns());
- }
- assertEquals(cmb2.getNumColumns(), countColumns);
- }
-
-
- @Test
- public void sliceTwoColumnsV2() {
- List<AColGroup> gs = new ArrayList<AColGroup>();
- gs.add(new ColGroupEmpty(ColIndexFactory.createI(0)));
- for(int i = 1; i < 51; i+=2) {
- gs.add(new ColGroupEmpty(ColIndexFactory.createI(i, i +1)));
- }
-
- CompressedMatrixBlock cmb = new CompressedMatrixBlock(100, 51, -1,
false, gs);
-
- CompressedMatrixBlock cmb2 = (CompressedMatrixBlock) cmb.slice(0, 99,
5, 40);
- assertEquals(40 - 4, cmb2.getNumColumns());
- assertEquals(18, cmb2.getColGroups().size());
-
- int countColumns = 0;
- for(AColGroup g : cmb2.getColGroups()) {
- IColIndex idx = g.getColIndices();
- countColumns += idx.size();
- assertTrue(idx.get(0) >= 0);
- assertTrue(idx.get(idx.size() - 1) < cmb.getNumColumns());
- }
- assertEquals(cmb2.getNumColumns(), countColumns);
- }
+ protected static final Log LOG =
LogFactory.getLog(CLALibSliceTest.class.getName());
+
+ @Test
+ public void sliceColumnsRanges() {
+ List<AColGroup> gs = new ArrayList<>();
+ for(int i = 0; i < 10; i++) {
+ gs.add(new ColGroupEmpty(ColIndexFactory.create(i * 10,
i * 10 + 10)));
+ }
+
+ CompressedMatrixBlock cmb = new CompressedMatrixBlock(100, 100,
-1, false, gs);
+
+ CompressedMatrixBlock cmb2 = (CompressedMatrixBlock)
cmb.slice(0, 99, 5, 49);
+ assertEquals(49 - 4, cmb2.getNumColumns());
+ assertEquals(5, cmb2.getColGroups().size());
+
+ int countColumns = 0;
+ for(AColGroup g : cmb2.getColGroups()) {
+ IColIndex idx = g.getColIndices();
+ countColumns += idx.size();
+ assertTrue(idx.get(0) >= 0);
+ assertTrue(idx.get(idx.size() - 1) <
cmb.getNumColumns());
+ }
+ assertEquals(cmb2.getNumColumns(), countColumns);
+ }
+
+ @Test
+ public void sliceSingleColumns() {
+ List<AColGroup> gs = new ArrayList<>();
+ for(int i = 0; i < 50; i++) {
+ gs.add(new ColGroupEmpty(ColIndexFactory.create(i, i +
1)));
+ }
+
+ CompressedMatrixBlock cmb = new CompressedMatrixBlock(100, 50,
-1, false, gs);
+
+ CompressedMatrixBlock cmb2 = (CompressedMatrixBlock)
cmb.slice(0, 99, 5, 40);
+ assertEquals(40 - 4, cmb2.getNumColumns());
+ assertEquals(40 - 4, cmb2.getColGroups().size());
+
+ int countColumns = 0;
+ for(AColGroup g : cmb2.getColGroups()) {
+ IColIndex idx = g.getColIndices();
+ countColumns += idx.size();
+ assertTrue(idx.get(0) >= 0);
+ assertTrue(idx.get(idx.size() - 1) <
cmb.getNumColumns());
+ }
+ assertEquals(cmb2.getNumColumns(), countColumns);
+ }
+
+ @Test
+ public void sliceTwoColumns() {
+ List<AColGroup> gs = new ArrayList<>();
+ for(int i = 0; i < 50; i+=2) {
+ gs.add(new ColGroupEmpty(ColIndexFactory.createI(i, i
+1)));
+ }
+
+ CompressedMatrixBlock cmb = new CompressedMatrixBlock(100, 50,
-1, false, gs);
+
+ CompressedMatrixBlock cmb2 = (CompressedMatrixBlock)
cmb.slice(0, 99, 5, 40);
+ assertEquals(40 - 4, cmb2.getNumColumns());
+ assertEquals((40 - 4) /2 + 1, cmb2.getColGroups().size());
+
+ int countColumns = 0;
+ for(AColGroup g : cmb2.getColGroups()) {
+ IColIndex idx = g.getColIndices();
+ countColumns += idx.size();
+ assertTrue(idx.get(0) >= 0);
+ assertTrue(idx.get(idx.size() - 1) <
cmb.getNumColumns());
+ }
+ assertEquals(cmb2.getNumColumns(), countColumns);
+ }
+
+
+ @Test
+ public void sliceTwoColumnsV2() {
+ List<AColGroup> gs = new ArrayList<>();
+ gs.add(new ColGroupEmpty(ColIndexFactory.createI(0)));
+ for(int i = 1; i < 51; i+=2) {
+ gs.add(new ColGroupEmpty(ColIndexFactory.createI(i, i
+1)));
+ }
+
+ CompressedMatrixBlock cmb = new CompressedMatrixBlock(100, 51,
-1, false, gs);
+
+ CompressedMatrixBlock cmb2 = (CompressedMatrixBlock)
cmb.slice(0, 99, 5, 40);
+ assertEquals(40 - 4, cmb2.getNumColumns());
+ assertEquals(18, cmb2.getColGroups().size());
+
+ int countColumns = 0;
+ for(AColGroup g : cmb2.getColGroups()) {
+ IColIndex idx = g.getColIndices();
+ countColumns += idx.size();
+ assertTrue(idx.get(0) >= 0);
+ assertTrue(idx.get(idx.size() - 1) <
cmb.getNumColumns());
+ }
+ assertEquals(cmb2.getNumColumns(), countColumns);
+ }
}
diff --git
a/src/test/java/org/apache/sysds/test/component/compress/util/CountMapTest.java
b/src/test/java/org/apache/sysds/test/component/compress/util/CountMapTest.java
index fecaf28d07..02a6a94e8f 100644
---
a/src/test/java/org/apache/sysds/test/component/compress/util/CountMapTest.java
+++
b/src/test/java/org/apache/sysds/test/component/compress/util/CountMapTest.java
@@ -146,7 +146,7 @@ public class CountMapTest {
Arrays.sort(vals, Comparator.comparing((x) -> x.key()));
for(int i = 0; i < 100; i++) {
assertEquals(1, vals[i].count);
- assertEquals((double) i, vals[i].key(), 0.0);
+ assertEquals(i, vals[i].key(), 0.0);
}
}
@@ -160,7 +160,7 @@ public class CountMapTest {
Arrays.sort(vals, Comparator.comparing((x) -> x.key()));
for(int i = 0; i < 100; i++) {
assertEquals(1, vals[i].count);
- assertEquals((double) i, vals[i].key(), 0.0);
+ assertEquals(i, vals[i].key(), 0.0);
}
}
@@ -206,15 +206,12 @@ public class CountMapTest {
@Test
public void getDictionary() {
- if(!(m instanceof DoubleCountHashMap))
- return;
-
for(int i = 0; i < 9; i++)
m.increment((double) i);
- double[] d = ((DoubleCountHashMap) m).getDictionary();
+ double[] d = m.getDictionary();
for(int i = 0; i < 9; i++)
- assertEquals((double) i, d[i], 0.0);
+ assertEquals(i, d[i], 0.0);
}
@Test()
diff --git
a/src/test/java/org/apache/sysds/test/functions/io/binary/SerializeTest.java
b/src/test/java/org/apache/sysds/test/functions/io/binary/SerializeTest.java
index be1e6a1ac2..38858dfb93 100644
--- a/src/test/java/org/apache/sysds/test/functions/io/binary/SerializeTest.java
+++ b/src/test/java/org/apache/sysds/test/functions/io/binary/SerializeTest.java
@@ -20,8 +20,6 @@
package org.apache.sysds.test.functions.io.binary;
import com.google.crypto.tink.subtle.Random;
-import org.apache.sysds.runtime.controlprogram.caching.ByteBuffer;
-import org.apache.sysds.runtime.util.FastBufferedDataOutputStream;
import org.apache.sysds.runtime.util.LocalFileUtils;
import org.junit.Assert;
import org.junit.Test;
@@ -35,9 +33,7 @@ import org.apache.sysds.test.AutomatedTestBase;
import org.apache.sysds.test.TestConfiguration;
import org.apache.sysds.test.TestUtils;
-import java.io.FileOutputStream;
import java.util.HashMap;
-import java.util.HashSet;
public class SerializeTest extends AutomatedTestBase
{
diff --git
a/src/test/java/org/apache/sysds/test/functions/transform/TransformFrameEncodeMultithreadedTest.java
b/src/test/java/org/apache/sysds/test/functions/transform/TransformFrameEncodeMultithreadedTest.java
index 4ad09555e8..53990025b0 100644
---
a/src/test/java/org/apache/sysds/test/functions/transform/TransformFrameEncodeMultithreadedTest.java
+++
b/src/test/java/org/apache/sysds/test/functions/transform/TransformFrameEncodeMultithreadedTest.java
@@ -42,8 +42,8 @@ import org.junit.Test;
public class TransformFrameEncodeMultithreadedTest extends AutomatedTestBase {
private final static String TEST_NAME1 =
"TransformFrameEncodeMultithreadedTest";
private final static String TEST_DIR = "functions/transform/";
- private final static String TEST_CLASS_DIR = TEST_DIR +
TransformFrameEncodeMultithreadedTest.class.getSimpleName()
- + "/";
+ private final static String TEST_CLASS_DIR = TEST_DIR +
+ TransformFrameEncodeMultithreadedTest.class.getSimpleName() +
"/";
// Datasets and transform tasks without missing values
private final static String DATASET1 = "homes3/homes.csv";
diff --git
a/src/test/java/org/apache/sysds/test/functions/transform/TransformFrameEncodeWordEmbedding1Test.java
b/src/test/java/org/apache/sysds/test/functions/transform/TransformFrameEncodeWordEmbedding1Test.java
index a69e287d33..1e5a73e4ff 100644
---
a/src/test/java/org/apache/sysds/test/functions/transform/TransformFrameEncodeWordEmbedding1Test.java
+++
b/src/test/java/org/apache/sysds/test/functions/transform/TransformFrameEncodeWordEmbedding1Test.java
@@ -39,114 +39,114 @@ import java.util.Random;
public class TransformFrameEncodeWordEmbedding1Test extends AutomatedTestBase
{
- private final static String TEST_NAME1 =
"TransformFrameEncodeWordEmbeddings";
- private final static String TEST_DIR = "functions/transform/";
- private final static String TEST_CLASS_DIR = TEST_DIR +
TransformFrameEncodeWordEmbedding1Test.class.getSimpleName() + "/";
-
- @Override
- public void setUp() {
- TestUtils.clearAssertionInformation();
- addTestConfiguration(TEST_NAME1, new TestConfiguration(TEST_CLASS_DIR,
TEST_NAME1));
- }
-
- @Test
- public void testTransformToWordEmbeddings() {
- runTransformTest(TEST_NAME1, ExecMode.SINGLE_NODE);
- }
-
- private void runTransformTest(String testname, ExecMode rt)
- {
- //set runtime platform
- ExecMode rtold = setExecMode(rt);
- try
- {
- int rows = 100;
- int cols = 300;
- getAndLoadTestConfiguration(testname);
- fullDMLScriptName = getScript();
-
- // Generate random embeddings for the distinct tokens
- double[][] a = createRandomMatrix("embeddings", rows, cols, 0, 10,
1, new Date().getTime());
- // Generate random distinct tokens
- List<String> strings = generateRandomStrings(rows, 10);
- // Generate the dictionary by assigning unique ID to each distinct
token
- Map<String,Integer> map = writeDictToCsvFile(strings,
baseDirectory + INPUT_DIR + "dict");
- // Create the dataset by repeating and shuffling the distinct
tokens
- List<String> stringsColumn = shuffleAndMultiplyStrings(strings,
32);
- writeStringsToCsvFile(stringsColumn, baseDirectory + INPUT_DIR +
"data");
-
- programArgs = new String[]{"-stats","-args", input("embeddings"),
input("data"), input("dict"), output("result")};
- runTest(true, EXCEPTION_NOT_EXPECTED, null, -1);
-
- // Manually derive the expected result
- double[][] res_expected = new double[stringsColumn.size()][cols];
- for (int i = 0; i < stringsColumn.size(); i++) {
- int rowMapped = map.get(stringsColumn.get(i));
- System.arraycopy(a[rowMapped], 0, res_expected[i], 0, cols);
- }
-
- // Compare results
- HashMap<MatrixValue.CellIndex, Double> res_actual =
readDMLMatrixFromOutputDir("result");
-
TestUtils.compareMatrices(TestUtils.convertHashMapToDoubleArray(res_actual),
res_expected, 1e-6);
- }
- catch(Exception ex) {
- throw new RuntimeException(ex);
- }
- finally {
- resetExecMode(rtold);
- }
- }
-
- public static List<String> shuffleAndMultiplyStrings(List<String> strings,
int multiply){
- List<String> out = new ArrayList<>();
- Random random = new Random();
- for (int i = 0; i < strings.size()*multiply; i++) {
- out.add(strings.get(random.nextInt(strings.size())));
- }
- return out;
- }
-
- public static List<String> generateRandomStrings(int numStrings, int
stringLength) {
- List<String> randomStrings = new ArrayList<>();
- Random random = new Random();
- String characters =
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789";
- for (int i = 0; i < numStrings; i++) {
- randomStrings.add(generateRandomString(random, stringLength,
characters));
- }
- return randomStrings;
- }
-
- public static String generateRandomString(Random random, int stringLength,
String characters){
- StringBuilder randomString = new StringBuilder();
- for (int j = 0; j < stringLength; j++) {
- int randomIndex = random.nextInt(characters.length());
- randomString.append(characters.charAt(randomIndex));
- }
- return randomString.toString();
- }
-
- public static void writeStringsToCsvFile(List<String> strings, String
fileName) {
- try (BufferedWriter bw = new BufferedWriter(new FileWriter(fileName)))
{
- for (String line : strings) {
- bw.write(line);
- bw.newLine();
- }
- } catch (IOException e) {
- e.printStackTrace();
- }
- }
-
- public static Map<String,Integer> writeDictToCsvFile(List<String> strings,
String fileName) {
- try (BufferedWriter bw = new BufferedWriter(new FileWriter(fileName)))
{
- Map<String,Integer> map = new HashMap<>();
- for (int i = 0; i < strings.size(); i++) {
- map.put(strings.get(i), i);
- bw.write(strings.get(i) + Lop.DATATYPE_PREFIX + (i+1) + "\n");
- }
- return map;
- } catch (IOException e) {
- e.printStackTrace();
- return null;
- }
- }
+ private final static String TEST_NAME1 =
"TransformFrameEncodeWordEmbeddings";
+ private final static String TEST_DIR = "functions/transform/";
+ private final static String TEST_CLASS_DIR = TEST_DIR +
TransformFrameEncodeWordEmbedding1Test.class.getSimpleName() + "/";
+
+ @Override
+ public void setUp() {
+ TestUtils.clearAssertionInformation();
+ addTestConfiguration(TEST_NAME1, new
TestConfiguration(TEST_CLASS_DIR, TEST_NAME1));
+ }
+
+ @Test
+ public void testTransformToWordEmbeddings() {
+ runTransformTest(TEST_NAME1, ExecMode.SINGLE_NODE);
+ }
+
+ private void runTransformTest(String testname, ExecMode rt)
+ {
+ //set runtime platform
+ ExecMode rtold = setExecMode(rt);
+ try
+ {
+ int rows = 100;
+ int cols = 300;
+ getAndLoadTestConfiguration(testname);
+ fullDMLScriptName = getScript();
+
+ // Generate random embeddings for the distinct tokens
+ double[][] a = createRandomMatrix("embeddings", rows,
cols, 0, 10, 1, new Date().getTime());
+ // Generate random distinct tokens
+ List<String> strings = generateRandomStrings(rows, 10);
+ // Generate the dictionary by assigning unique ID to
each distinct token
+ Map<String,Integer> map = writeDictToCsvFile(strings,
baseDirectory + INPUT_DIR + "dict");
+ // Create the dataset by repeating and shuffling the
distinct tokens
+ List<String> stringsColumn =
shuffleAndMultiplyStrings(strings, 32);
+ writeStringsToCsvFile(stringsColumn, baseDirectory +
INPUT_DIR + "data");
+
+ programArgs = new String[]{"-stats","-args",
input("embeddings"), input("data"), input("dict"), output("result")};
+ runTest(true, EXCEPTION_NOT_EXPECTED, null, -1);
+
+ // Manually derive the expected result
+ double[][] res_expected = new
double[stringsColumn.size()][cols];
+ for (int i = 0; i < stringsColumn.size(); i++) {
+ int rowMapped = map.get(stringsColumn.get(i));
+ System.arraycopy(a[rowMapped], 0,
res_expected[i], 0, cols);
+ }
+
+ // Compare results
+ HashMap<MatrixValue.CellIndex, Double> res_actual =
readDMLMatrixFromOutputDir("result");
+
TestUtils.compareMatrices(TestUtils.convertHashMapToDoubleArray(res_actual),
res_expected, 1e-6);
+ }
+ catch(Exception ex) {
+ throw new RuntimeException(ex);
+ }
+ finally {
+ resetExecMode(rtold);
+ }
+ }
+
+ public static List<String> shuffleAndMultiplyStrings(List<String>
strings, int multiply){
+ List<String> out = new ArrayList<>();
+ Random random = new Random();
+ for (int i = 0; i < strings.size()*multiply; i++) {
+ out.add(strings.get(random.nextInt(strings.size())));
+ }
+ return out;
+ }
+
+ public static List<String> generateRandomStrings(int numStrings, int
stringLength) {
+ List<String> randomStrings = new ArrayList<>();
+ Random random = new Random();
+ String characters =
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789";
+ for (int i = 0; i < numStrings; i++) {
+ randomStrings.add(generateRandomString(random,
stringLength, characters));
+ }
+ return randomStrings;
+ }
+
+ public static String generateRandomString(Random random, int
stringLength, String characters){
+ StringBuilder randomString = new StringBuilder();
+ for (int j = 0; j < stringLength; j++) {
+ int randomIndex = random.nextInt(characters.length());
+ randomString.append(characters.charAt(randomIndex));
+ }
+ return randomString.toString();
+ }
+
+ public static void writeStringsToCsvFile(List<String> strings, String
fileName) {
+ try (BufferedWriter bw = new BufferedWriter(new
FileWriter(fileName))) {
+ for (String line : strings) {
+ bw.write(line);
+ bw.newLine();
+ }
+ } catch (IOException e) {
+ e.printStackTrace();
+ }
+ }
+
+ public static Map<String,Integer> writeDictToCsvFile(List<String>
strings, String fileName) {
+ try (BufferedWriter bw = new BufferedWriter(new
FileWriter(fileName))) {
+ Map<String,Integer> map = new HashMap<>();
+ for (int i = 0; i < strings.size(); i++) {
+ map.put(strings.get(i), i);
+ bw.write(strings.get(i) + Lop.DATATYPE_PREFIX +
(i+1) + "\n");
+ }
+ return map;
+ } catch (IOException e) {
+ e.printStackTrace();
+ return null;
+ }
+ }
}
diff --git
a/src/test/java/org/apache/sysds/test/functions/transform/TransformFrameEncodeWordEmbedding2Test.java
b/src/test/java/org/apache/sysds/test/functions/transform/TransformFrameEncodeWordEmbedding2Test.java
index 6fb9f511ea..a4ef0fbab9 100644
---
a/src/test/java/org/apache/sysds/test/functions/transform/TransformFrameEncodeWordEmbedding2Test.java
+++
b/src/test/java/org/apache/sysds/test/functions/transform/TransformFrameEncodeWordEmbedding2Test.java
@@ -41,260 +41,263 @@ import java.util.Random;
public class TransformFrameEncodeWordEmbedding2Test extends AutomatedTestBase
{
- private final static String TEST_NAME1 =
"TransformFrameEncodeWordEmbeddings2";
- private final static String TEST_NAME2a =
"TransformFrameEncodeWordEmbeddings2MultiCols1";
- private final static String TEST_NAME2b =
"TransformFrameEncodeWordEmbeddings2MultiCols2";
-
- private final static String TEST_DIR = "functions/transform/";
- private final static String TEST_CLASS_DIR = TEST_DIR +
TransformFrameEncodeWordEmbedding1Test.class.getSimpleName() + "/";
-
- @Override
- public void setUp() {
- TestUtils.clearAssertionInformation();
- addTestConfiguration(TEST_NAME1, new TestConfiguration(TEST_DIR,
TEST_NAME1));
- addTestConfiguration(TEST_NAME2a, new TestConfiguration(TEST_DIR,
TEST_NAME2a));
- addTestConfiguration(TEST_NAME2b, new TestConfiguration(TEST_DIR,
TEST_NAME2b));
- }
-
- @Test
- public void testTransformToWordEmbeddings() {
- runTransformTest(TEST_NAME1, ExecMode.SINGLE_NODE);
- }
-
- @Test
- @Ignore
- public void testNonRandomTransformToWordEmbeddings2Cols() {
- runTransformTest(TEST_NAME2a, ExecMode.SINGLE_NODE);
- }
-
- @Test
- @Ignore
- public void testRandomTransformToWordEmbeddings4Cols() {
- runTransformTestMultiCols(TEST_NAME2b, ExecMode.SINGLE_NODE);
- }
-
- @Test
- @Ignore
- public void runBenchmark(){
- runBenchmark(TEST_NAME1, ExecMode.SINGLE_NODE);
- }
-
-
-
-
- private void runBenchmark(String testname, ExecMode rt)
- {
- //set runtime platform
- ExecMode rtold = setExecMode(rt);
- try
- {
- int rows = 100;
- int cols = 300;
- getAndLoadTestConfiguration(testname);
- fullDMLScriptName = getScript();
-
- // Generate random embeddings for the distinct tokens
- double[][] a = createRandomMatrix("embeddings", rows, cols, 0, 10,
1, new Date().getTime());
-
- // Generate random distinct tokens
- List<String> strings = generateRandomStrings(rows, 10);
-
- // Generate the dictionary by assigning unique ID to each distinct
token
- Map<String,Integer> map = writeDictToCsvFile(strings,
baseDirectory + INPUT_DIR + "dict");
-
- // Create the dataset by repeating and shuffling the distinct
tokens
- List<String> stringsColumn = shuffleAndMultiplyStrings(strings,
320);
- writeStringsToCsvFile(stringsColumn, baseDirectory + INPUT_DIR +
"data");
-
- //run script
- programArgs = new String[]{"-stats","-args", input("embeddings"),
input("data"), input("dict"), output("result")};
- runTest(true, EXCEPTION_NOT_EXPECTED, null, -1);
- }
- catch(Exception ex) {
- throw new RuntimeException(ex);
-
- }
- finally {
- resetExecMode(rtold);
- }
- }
-
- private void runTransformTest(String testname, ExecMode rt)
- {
- //set runtime platform
- ExecMode rtold = setExecMode(rt);
- try
- {
- int rows = 100;
- int cols = 300;
- getAndLoadTestConfiguration(testname);
- fullDMLScriptName = getScript();
-
- // Generate random embeddings for the distinct tokens
- double[][] a = createRandomMatrix("embeddings", rows, cols, 0, 10,
1, new Date().getTime());
-
- // Generate random distinct tokens
- List<String> strings = generateRandomStrings(rows, 10);
-
- // Generate the dictionary by assigning unique ID to each distinct
token
- Map<String,Integer> map = writeDictToCsvFile(strings,
baseDirectory + INPUT_DIR + "dict");
-
- // Create the dataset by repeating and shuffling the distinct
tokens
- List<String> stringsColumn = shuffleAndMultiplyStrings(strings,
320);
- writeStringsToCsvFile(stringsColumn, baseDirectory + INPUT_DIR +
"data");
-
- //run script
- programArgs = new String[]{"-stats","-args", input("embeddings"),
input("data"), input("dict"), output("result")};
- runTest(true, EXCEPTION_NOT_EXPECTED, null, -1);
-
- // Manually derive the expected result
- double[][] res_expected = manuallyDeriveWordEmbeddings(cols, a,
map, stringsColumn);
-
- // Compare results
- HashMap<MatrixValue.CellIndex, Double> res_actual =
readDMLMatrixFromOutputDir("result");
- double[][] resultActualDouble =
TestUtils.convertHashMapToDoubleArray(res_actual);
- TestUtils.compareMatrices(resultActualDouble, res_expected, 1e-6);
- }
- catch(Exception ex) {
- throw new RuntimeException(ex);
-
- }
- finally {
- resetExecMode(rtold);
- }
- }
-
- public static void print2DimDoubleArray(double[][] resultActualDouble) {
- Arrays.stream(resultActualDouble).forEach(
- e -> System.out.println(Arrays.stream(e).mapToObj(d ->
String.format("%06.1f", d))
- .reduce("", (sub, elem) -> sub + " " + elem)));
- }
-
- private void runTransformTestMultiCols(String testname, ExecMode rt)
- {
- //set runtime platform
- ExecMode rtold = setExecMode(rt);
- try
- {
- int rows = 100;
- int cols = 100;
- getAndLoadTestConfiguration(testname);
- fullDMLScriptName = getScript();
-
- // Generate random embeddings for the distinct tokens
- double[][] a = createRandomMatrix("embeddings", rows, cols, 0, 10,
1, new Date().getTime());
-
- // Generate random distinct tokens
- List<String> strings = generateRandomStrings(rows, 10);
-
- // Generate the dictionary by assigning unique ID to each distinct
token
- Map<String,Integer> map = writeDictToCsvFile(strings,
baseDirectory + INPUT_DIR + "dict");
-
- // Create the dataset by repeating and shuffling the distinct
tokens
- List<String> stringsColumn = shuffleAndMultiplyStrings(strings,
10);
- writeStringsToCsvFile(stringsColumn, baseDirectory + INPUT_DIR +
"data");
-
- //run script
- programArgs = new String[]{"-stats","-args", input("embeddings"),
input("data"), input("dict"), output("result"), output("result2")};
- runTest(true, EXCEPTION_NOT_EXPECTED, null, -1);
-
- // Manually derive the expected result
- double[][] res_expected = manuallyDeriveWordEmbeddings(cols, a,
map, stringsColumn);
-
- // Compare results
- HashMap<MatrixValue.CellIndex, Double> res_actual =
readDMLMatrixFromOutputDir("result");
- HashMap<MatrixValue.CellIndex, Double> res_actual2 =
readDMLMatrixFromOutputDir("result2");
- double[][] resultActualDouble =
TestUtils.convertHashMapToDoubleArray(res_actual);
- double[][] resultActualDouble2 =
TestUtils.convertHashMapToDoubleArray(res_actual2);
- //System.out.println("Actual Result1 [" +
resultActualDouble.length + "x" + resultActualDouble[0].length + "]:");
- print2DimDoubleArray(resultActualDouble);
- //System.out.println("\nActual Result2 [" +
resultActualDouble.length + "x" + resultActualDouble[0].length + "]:");
- //print2DimDoubleArray(resultActualDouble2);
- //System.out.println("\nExpected Result [" + res_expected.length +
"x" + res_expected[0].length + "]:");
- //print2DimDoubleArray(res_expected);
- TestUtils.compareMatrices(resultActualDouble, res_expected, 1e-6);
- TestUtils.compareMatrices(resultActualDouble, resultActualDouble2,
1e-6);
- }
- catch(Exception ex) {
- throw new RuntimeException(ex);
-
- }
- finally {
- resetExecMode(rtold);
- }
- }
-
- public static double[][] manuallyDeriveWordEmbeddings(int cols, double[][]
a, Map<String, Integer> map, List<String> stringsColumn) {
- // Manually derive the expected result
- double[][] res_expected = new double[stringsColumn.size()][cols];
- for (int i = 0; i < stringsColumn.size(); i++) {
- int rowMapped = map.get(stringsColumn.get(i));
- System.arraycopy(a[rowMapped], 0, res_expected[i], 0, cols);
- }
- return res_expected;
- }
-
- private double[][] generateWordEmbeddings(int rows, int cols) {
- double[][] a = new double[rows][cols];
- for (int i = 0; i < a.length; i++) {
- for (int j = 0; j < a[i].length; j++) {
- a[i][j] = cols *i + j;
- }
-
- }
- return a;
- }
-
- public static List<String> shuffleAndMultiplyStrings(List<String> strings,
int multiply){
- List<String> out = new ArrayList<>();
- Random random = new Random();
- for (int i = 0; i < strings.size()*multiply; i++) {
- out.add(strings.get(random.nextInt(strings.size())));
- }
- return out;
- }
-
- public static List<String> generateRandomStrings(int numStrings, int
stringLength) {
- List<String> randomStrings = new ArrayList<>();
- Random random = new Random();
- String characters =
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789";
- for (int i = 0; i < numStrings; i++) {
- randomStrings.add(generateRandomString(random, stringLength,
characters));
- }
- return randomStrings;
- }
-
- public static String generateRandomString(Random random, int stringLength,
String characters){
- StringBuilder randomString = new StringBuilder();
- for (int j = 0; j < stringLength; j++) {
- int randomIndex = random.nextInt(characters.length());
- randomString.append(characters.charAt(randomIndex));
- }
- return randomString.toString();
- }
-
- public static void writeStringsToCsvFile(List<String> strings, String
fileName) {
- try (BufferedWriter bw = new BufferedWriter(new FileWriter(fileName)))
{
- for (String line : strings) {
- bw.write(line);
- bw.newLine();
- }
- } catch (IOException e) {
- e.printStackTrace();
- }
- }
-
- public static Map<String,Integer> writeDictToCsvFile(List<String> strings,
String fileName) {
- try (BufferedWriter bw = new BufferedWriter(new FileWriter(fileName)))
{
- Map<String,Integer> map = new HashMap<>();
- for (int i = 0; i < strings.size(); i++) {
- map.put(strings.get(i), i);
- bw.write(strings.get(i) + Lop.DATATYPE_PREFIX + (i+1) + "\n");
- }
- return map;
- } catch (IOException e) {
- e.printStackTrace();
- return null;
- }
- }
+ private final static String TEST_NAME1 =
"TransformFrameEncodeWordEmbeddings2";
+ private final static String TEST_NAME2a =
"TransformFrameEncodeWordEmbeddings2MultiCols1";
+ private final static String TEST_NAME2b =
"TransformFrameEncodeWordEmbeddings2MultiCols2";
+
+ private final static String TEST_DIR = "functions/transform/";
+ private final static String TEST_CLASS_DIR = TEST_DIR +
TransformFrameEncodeWordEmbedding1Test.class.getSimpleName() + "/";
+
+ @Override
+ public void setUp() {
+ TestUtils.clearAssertionInformation();
+ addTestConfiguration(TEST_NAME1, new
TestConfiguration(TEST_CLASS_DIR, TEST_NAME1));
+ addTestConfiguration(TEST_NAME2a, new
TestConfiguration(TEST_CLASS_DIR, TEST_NAME2a));
+ addTestConfiguration(TEST_NAME2b, new
TestConfiguration(TEST_CLASS_DIR, TEST_NAME2b));
+ }
+
+ @Test
+ public void testTransformToWordEmbeddings() {
+ runTransformTest(TEST_NAME1, ExecMode.SINGLE_NODE);
+ }
+
+ @Test
+ @Ignore
+ public void testNonRandomTransformToWordEmbeddings2Cols() {
+ runTransformTest(TEST_NAME2a, ExecMode.SINGLE_NODE);
+ }
+
+ @Test
+ @Ignore
+ public void testRandomTransformToWordEmbeddings4Cols() {
+ runTransformTestMultiCols(TEST_NAME2b, ExecMode.SINGLE_NODE);
+ }
+
+ @Test
+ @Ignore
+ public void runBenchmark(){
+ runBenchmark(TEST_NAME1, ExecMode.SINGLE_NODE);
+ }
+
+
+
+
+ private void runBenchmark(String testname, ExecMode rt)
+ {
+ //set runtime platform
+ ExecMode rtold = setExecMode(rt);
+ try
+ {
+ int rows = 100;
+ int cols = 300;
+ getAndLoadTestConfiguration(testname);
+ fullDMLScriptName = getScript();
+
+ // Generate random embeddings for the distinct tokens
+ @SuppressWarnings("unused") //FIXME result comparison
+ double[][] a = createRandomMatrix("embeddings", rows,
cols, 0, 10, 1, new Date().getTime());
+
+ // Generate random distinct tokens
+ List<String> strings = generateRandomStrings(rows, 10);
+
+ // Generate the dictionary by assigning unique ID to
each distinct token
+ @SuppressWarnings("unused")
+ Map<String,Integer> map = writeDictToCsvFile(strings,
baseDirectory + INPUT_DIR + "dict");
+
+ // Create the dataset by repeating and shuffling the
distinct tokens
+ List<String> stringsColumn =
shuffleAndMultiplyStrings(strings, 320);
+ writeStringsToCsvFile(stringsColumn, baseDirectory +
INPUT_DIR + "data");
+
+ //run script
+ programArgs = new String[]{"-stats","-args",
input("embeddings"), input("data"), input("dict"), output("result")};
+ runTest(true, EXCEPTION_NOT_EXPECTED, null, -1);
+ }
+ catch(Exception ex) {
+ throw new RuntimeException(ex);
+
+ }
+ finally {
+ resetExecMode(rtold);
+ }
+ }
+
+ private void runTransformTest(String testname, ExecMode rt)
+ {
+ //set runtime platform
+ ExecMode rtold = setExecMode(rt);
+ try
+ {
+ int rows = 100;
+ int cols = 300;
+ getAndLoadTestConfiguration(testname);
+ fullDMLScriptName = getScript();
+
+ // Generate random embeddings for the distinct tokens
+ double[][] a = createRandomMatrix("embeddings", rows,
cols, 0, 10, 1, new Date().getTime());
+
+ // Generate random distinct tokens
+ List<String> strings = generateRandomStrings(rows, 10);
+
+ // Generate the dictionary by assigning unique ID to
each distinct token
+ Map<String,Integer> map = writeDictToCsvFile(strings,
baseDirectory + INPUT_DIR + "dict");
+
+ // Create the dataset by repeating and shuffling the
distinct tokens
+ List<String> stringsColumn =
shuffleAndMultiplyStrings(strings, 320);
+ writeStringsToCsvFile(stringsColumn, baseDirectory +
INPUT_DIR + "data");
+
+ //run script
+ programArgs = new String[]{"-stats","-args",
input("embeddings"), input("data"), input("dict"), output("result")};
+ runTest(true, EXCEPTION_NOT_EXPECTED, null, -1);
+
+ // Manually derive the expected result
+ double[][] res_expected =
manuallyDeriveWordEmbeddings(cols, a, map, stringsColumn);
+
+ // Compare results
+ HashMap<MatrixValue.CellIndex, Double> res_actual =
readDMLMatrixFromOutputDir("result");
+ double[][] resultActualDouble =
TestUtils.convertHashMapToDoubleArray(res_actual);
+ TestUtils.compareMatrices(resultActualDouble,
res_expected, 1e-6);
+ }
+ catch(Exception ex) {
+ throw new RuntimeException(ex);
+
+ }
+ finally {
+ resetExecMode(rtold);
+ }
+ }
+
+ public static void print2DimDoubleArray(double[][] resultActualDouble) {
+ Arrays.stream(resultActualDouble).forEach(
+ e ->
System.out.println(Arrays.stream(e).mapToObj(d -> String.format("%06.1f", d))
+ .reduce("", (sub, elem) -> sub
+ " " + elem)));
+ }
+
+ private void runTransformTestMultiCols(String testname, ExecMode rt)
+ {
+ //set runtime platform
+ ExecMode rtold = setExecMode(rt);
+ try
+ {
+ int rows = 100;
+ int cols = 100;
+ getAndLoadTestConfiguration(testname);
+ fullDMLScriptName = getScript();
+
+ // Generate random embeddings for the distinct tokens
+ double[][] a = createRandomMatrix("embeddings", rows,
cols, 0, 10, 1, new Date().getTime());
+
+ // Generate random distinct tokens
+ List<String> strings = generateRandomStrings(rows, 10);
+
+ // Generate the dictionary by assigning unique ID to
each distinct token
+ Map<String,Integer> map = writeDictToCsvFile(strings,
baseDirectory + INPUT_DIR + "dict");
+
+ // Create the dataset by repeating and shuffling the
distinct tokens
+ List<String> stringsColumn =
shuffleAndMultiplyStrings(strings, 10);
+ writeStringsToCsvFile(stringsColumn, baseDirectory +
INPUT_DIR + "data");
+
+ //run script
+ programArgs = new String[]{"-stats","-args",
input("embeddings"), input("data"), input("dict"), output("result"),
output("result2")};
+ runTest(true, EXCEPTION_NOT_EXPECTED, null, -1);
+
+ // Manually derive the expected result
+ double[][] res_expected =
manuallyDeriveWordEmbeddings(cols, a, map, stringsColumn);
+
+ // Compare results
+ HashMap<MatrixValue.CellIndex, Double> res_actual =
readDMLMatrixFromOutputDir("result");
+ HashMap<MatrixValue.CellIndex, Double> res_actual2 =
readDMLMatrixFromOutputDir("result2");
+ double[][] resultActualDouble =
TestUtils.convertHashMapToDoubleArray(res_actual);
+ double[][] resultActualDouble2 =
TestUtils.convertHashMapToDoubleArray(res_actual2);
+ //System.out.println("Actual Result1 [" +
resultActualDouble.length + "x" + resultActualDouble[0].length + "]:");
+ print2DimDoubleArray(resultActualDouble);
+ //System.out.println("\nActual Result2 [" +
resultActualDouble.length + "x" + resultActualDouble[0].length + "]:");
+ //print2DimDoubleArray(resultActualDouble2);
+ //System.out.println("\nExpected Result [" +
res_expected.length + "x" + res_expected[0].length + "]:");
+ //print2DimDoubleArray(res_expected);
+ TestUtils.compareMatrices(resultActualDouble,
res_expected, 1e-6);
+ TestUtils.compareMatrices(resultActualDouble,
resultActualDouble2, 1e-6);
+ }
+ catch(Exception ex) {
+ throw new RuntimeException(ex);
+
+ }
+ finally {
+ resetExecMode(rtold);
+ }
+ }
+
+ public static double[][] manuallyDeriveWordEmbeddings(int cols,
double[][] a, Map<String, Integer> map, List<String> stringsColumn) {
+ // Manually derive the expected result
+ double[][] res_expected = new
double[stringsColumn.size()][cols];
+ for (int i = 0; i < stringsColumn.size(); i++) {
+ int rowMapped = map.get(stringsColumn.get(i));
+ System.arraycopy(a[rowMapped], 0, res_expected[i], 0,
cols);
+ }
+ return res_expected;
+ }
+
+ @SuppressWarnings("unused")
+ private double[][] generateWordEmbeddings(int rows, int cols) {
+ double[][] a = new double[rows][cols];
+ for (int i = 0; i < a.length; i++) {
+ for (int j = 0; j < a[i].length; j++) {
+ a[i][j] = cols *i + j;
+ }
+
+ }
+ return a;
+ }
+
+ public static List<String> shuffleAndMultiplyStrings(List<String>
strings, int multiply){
+ List<String> out = new ArrayList<>();
+ Random random = new Random();
+ for (int i = 0; i < strings.size()*multiply; i++) {
+ out.add(strings.get(random.nextInt(strings.size())));
+ }
+ return out;
+ }
+
+ public static List<String> generateRandomStrings(int numStrings, int
stringLength) {
+ List<String> randomStrings = new ArrayList<>();
+ Random random = new Random();
+ String characters =
"ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789";
+ for (int i = 0; i < numStrings; i++) {
+ randomStrings.add(generateRandomString(random,
stringLength, characters));
+ }
+ return randomStrings;
+ }
+
+ public static String generateRandomString(Random random, int
stringLength, String characters){
+ StringBuilder randomString = new StringBuilder();
+ for (int j = 0; j < stringLength; j++) {
+ int randomIndex = random.nextInt(characters.length());
+ randomString.append(characters.charAt(randomIndex));
+ }
+ return randomString.toString();
+ }
+
+ public static void writeStringsToCsvFile(List<String> strings, String
fileName) {
+ try (BufferedWriter bw = new BufferedWriter(new
FileWriter(fileName))) {
+ for (String line : strings) {
+ bw.write(line);
+ bw.newLine();
+ }
+ } catch (IOException e) {
+ e.printStackTrace();
+ }
+ }
+
+ public static Map<String,Integer> writeDictToCsvFile(List<String>
strings, String fileName) {
+ try (BufferedWriter bw = new BufferedWriter(new
FileWriter(fileName))) {
+ Map<String,Integer> map = new HashMap<>();
+ for (int i = 0; i < strings.size(); i++) {
+ map.put(strings.get(i), i);
+ bw.write(strings.get(i) + Lop.DATATYPE_PREFIX +
(i+1) + "\n");
+ }
+ return map;
+ } catch (IOException e) {
+ e.printStackTrace();
+ return null;
+ }
+ }
}
diff --git
a/src/test/java/org/apache/sysds/test/functions/transform/TransformFrameEncodeWordEmbeddingMMTest.java
b/src/test/java/org/apache/sysds/test/functions/transform/TransformFrameEncodeWordEmbeddingMMTest.java
index 3862294ca6..1359497a93 100644
---
a/src/test/java/org/apache/sysds/test/functions/transform/TransformFrameEncodeWordEmbeddingMMTest.java
+++
b/src/test/java/org/apache/sysds/test/functions/transform/TransformFrameEncodeWordEmbeddingMMTest.java
@@ -35,74 +35,74 @@ import java.util.Map;
import static
org.apache.sysds.test.functions.transform.TransformFrameEncodeWordEmbedding2Test.*;
public class TransformFrameEncodeWordEmbeddingMMTest extends AutomatedTestBase
{
- private final static String TEST_NAME1 =
"TransformFrameEncodeWordEmbeddingsMM";
- private final static String TEST_DIR = "functions/transform/";
-
- @Override
- public void setUp() {
- TestUtils.clearAssertionInformation();
- addTestConfiguration(TEST_NAME1, new TestConfiguration(TEST_DIR,
TEST_NAME1));
- }
-
- @Test
- public void testMultiplication() {
- runMatrixMultiplicationTest(TEST_NAME1, Types.ExecMode.SINGLE_NODE);
- }
-
- private void runMatrixMultiplicationTest(String testname, Types.ExecMode
rt)
- {
- //set runtime platform
- Types.ExecMode rtold = setExecMode(rt);
- try
- {
- int rows = 100;
- int cols = 300;
- getAndLoadTestConfiguration(testname);
- fullDMLScriptName = getScript();
-
- // Generate random embeddings for the distinct tokens
- double[][] a = createRandomMatrix("embeddings", rows, cols, 0, 10,
1, new Date().getTime());
- double[][] b = createRandomMatrix("factor", cols, cols, 0, 10, 1,
new Date().getTime());
-
- // Generate random distinct tokens
- List<String> strings = generateRandomStrings(rows, 10);
-
- // Generate the dictionary by assigning unique ID to each distinct
token
- Map<String,Integer> map = writeDictToCsvFile(strings,
baseDirectory + INPUT_DIR + "dict");
-
- // Create the dataset by repeating and shuffling the distinct
tokens
- int factor = 320;
- rows *= factor;
- List<String> stringsColumn = shuffleAndMultiplyStrings(strings,
factor);
- writeStringsToCsvFile(stringsColumn, baseDirectory + INPUT_DIR +
"data");
-
- //run script
- programArgs = new String[]{"-stats","-args", input("embeddings"),
input("data"), input("dict"), input("factor"), output("result")};
- runTest(true, EXCEPTION_NOT_EXPECTED, null, -1);
-
- // Manually derive the expected result
- double[][] res_expected = manuallyDeriveWordEmbeddings(cols, a,
map, stringsColumn);
- double[][] res_expectedMM = new double[rows][cols];
- for (int i = 0; i < res_expectedMM.length; i++) {
- for (int j = 0; j < res_expectedMM[i].length; j++) {
- res_expectedMM[i][j] = 0.0;
- for (int k = 0; k < res_expected[i].length; k++) {
- res_expectedMM[i][j] += res_expected[i][k]*b[k][j];
- }
- }
- }
- // Compare results
- HashMap<MatrixValue.CellIndex, Double> res_actual =
readDMLMatrixFromOutputDir("result");
- double[][] resultActualDouble =
TestUtils.convertHashMapToDoubleArray(res_actual);
- //print2DimDoubleArray(resultActualDouble);
- TestUtils.compareMatrices(res_expectedMM, resultActualDouble,
1e-8);
- }
- catch(Exception ex) {
- throw new RuntimeException(ex);
-
- }
- finally {
- resetExecMode(rtold);
- }
- }
+ private final static String TEST_NAME1 =
"TransformFrameEncodeWordEmbeddingsMM";
+ private final static String TEST_DIR = "functions/transform/";
+
+ @Override
+ public void setUp() {
+ TestUtils.clearAssertionInformation();
+ addTestConfiguration(TEST_NAME1, new
TestConfiguration(TEST_DIR, TEST_NAME1));
+ }
+
+ @Test
+ public void testMultiplication() {
+ runMatrixMultiplicationTest(TEST_NAME1,
Types.ExecMode.SINGLE_NODE);
+ }
+
+ private void runMatrixMultiplicationTest(String testname,
Types.ExecMode rt)
+ {
+ //set runtime platform
+ Types.ExecMode rtold = setExecMode(rt);
+ try
+ {
+ int rows = 100;
+ int cols = 300;
+ getAndLoadTestConfiguration(testname);
+ fullDMLScriptName = getScript();
+
+ // Generate random embeddings for the distinct tokens
+ double[][] a = createRandomMatrix("embeddings", rows,
cols, 0, 10, 1, new Date().getTime());
+ double[][] b = createRandomMatrix("factor", cols, cols,
0, 10, 1, new Date().getTime());
+
+ // Generate random distinct tokens
+ List<String> strings = generateRandomStrings(rows, 10);
+
+ // Generate the dictionary by assigning unique ID to
each distinct token
+ Map<String,Integer> map = writeDictToCsvFile(strings,
baseDirectory + INPUT_DIR + "dict");
+
+ // Create the dataset by repeating and shuffling the
distinct tokens
+ int factor = 320;
+ rows *= factor;
+ List<String> stringsColumn =
shuffleAndMultiplyStrings(strings, factor);
+ writeStringsToCsvFile(stringsColumn, baseDirectory +
INPUT_DIR + "data");
+
+ //run script
+ programArgs = new String[]{"-stats","-args",
input("embeddings"), input("data"), input("dict"), input("factor"),
output("result")};
+ runTest(true, EXCEPTION_NOT_EXPECTED, null, -1);
+
+ // Manually derive the expected result
+ double[][] res_expected =
manuallyDeriveWordEmbeddings(cols, a, map, stringsColumn);
+ double[][] res_expectedMM = new double[rows][cols];
+ for (int i = 0; i < res_expectedMM.length; i++) {
+ for (int j = 0; j < res_expectedMM[i].length;
j++) {
+ res_expectedMM[i][j] = 0.0;
+ for (int k = 0; k <
res_expected[i].length; k++) {
+ res_expectedMM[i][j] +=
res_expected[i][k]*b[k][j];
+ }
+ }
+ }
+ // Compare results
+ HashMap<MatrixValue.CellIndex, Double> res_actual =
readDMLMatrixFromOutputDir("result");
+ double[][] resultActualDouble =
TestUtils.convertHashMapToDoubleArray(res_actual);
+ //print2DimDoubleArray(resultActualDouble);
+ TestUtils.compareMatrices(res_expectedMM,
resultActualDouble, 1e-8);
+ }
+ catch(Exception ex) {
+ throw new RuntimeException(ex);
+
+ }
+ finally {
+ resetExecMode(rtold);
+ }
+ }
}
diff --git
a/src/test/java/org/apache/sysds/test/functions/transform/TransformFrameEncodeWordEmbeddingRowSumTest.java
b/src/test/java/org/apache/sysds/test/functions/transform/TransformFrameEncodeWordEmbeddingRowSumTest.java
index b3a09f3ec8..31916821f6 100644
---
a/src/test/java/org/apache/sysds/test/functions/transform/TransformFrameEncodeWordEmbeddingRowSumTest.java
+++
b/src/test/java/org/apache/sysds/test/functions/transform/TransformFrameEncodeWordEmbeddingRowSumTest.java
@@ -37,195 +37,195 @@ import static
org.apache.sysds.runtime.functionobjects.KahanPlus.getKahanPlusFnO
import static
org.apache.sysds.test.functions.transform.TransformFrameEncodeWordEmbedding2Test.*;
public class TransformFrameEncodeWordEmbeddingRowSumTest extends
AutomatedTestBase {
- private final static String TEST_NAME1 =
"TransformFrameEncodeWordEmbeddingsRowSum";
- private final static String TEST_NAME2 =
"TransformFrameEncodeWordEmbeddingsColSum";
- private final static String TEST_NAME3 =
"TransformFrameEncodeWordEmbeddingsFullSum";
- private final static String TEST_DIR = "functions/transform/";
-
- @Override
- public void setUp() {
- TestUtils.clearAssertionInformation();
- addTestConfiguration(TEST_NAME1, new TestConfiguration(TEST_DIR,
TEST_NAME1));
- addTestConfiguration(TEST_NAME2, new TestConfiguration(TEST_DIR,
TEST_NAME2));
- addTestConfiguration(TEST_NAME3, new TestConfiguration(TEST_DIR,
TEST_NAME3));
- }
-
- @Test
- public void testDedupRowSums() {
- runDedupRowSumTest(TEST_NAME1, Types.ExecMode.SINGLE_NODE);
- }
-
- @Test
- public void testDedupColSums() {
- runDedupColSumTest(TEST_NAME2, Types.ExecMode.SINGLE_NODE);
- }
-
- @Test
- public void testDedupFullSums() {
- runDedupFullSumTest(TEST_NAME3, Types.ExecMode.SINGLE_NODE);
- }
-
- private void runDedupFullSumTest(String testname, Types.ExecMode rt)
- {
- //set runtime platform
- Types.ExecMode rtold = setExecMode(rt);
- try
- {
- int rows = 100;
- int cols = 300;
- getAndLoadTestConfiguration(testname);
- fullDMLScriptName = getScript();
-
- // Generate random embeddings for the distinct tokens
- double[][] a = createRandomMatrix("embeddings", rows, cols, 0, 10,
1, new Date().getTime());
-
- // Generate random distinct tokens
- List<String> strings = generateRandomStrings(rows, 10);
-
- // Generate the dictionary by assigning unique ID to each distinct
token
- Map<String,Integer> map = writeDictToCsvFile(strings,
baseDirectory + INPUT_DIR + "dict");
-
- // Create the dataset by repeating and shuffling the distinct
tokens
- List<String> stringsColumn = shuffleAndMultiplyStrings(strings,
320*6);
- writeStringsToCsvFile(stringsColumn, baseDirectory + INPUT_DIR +
"data");
-
- //run script
- programArgs = new String[]{"-stats","-args", input("embeddings"),
input("data"), input("dict"), output("result")};
- runTest(true, EXCEPTION_NOT_EXPECTED, null, -1);
-
- // Manually derive the expected result
- double[][] res_expected = manuallyDeriveWordEmbeddings(cols, a,
map, stringsColumn);
- double[][] sums_expected = new double[1][1];
- KahanObject ko = new KahanObject(0,0);
- KahanPlus kp = getKahanPlusFnObject();
- for (int i = 0; i < res_expected.length; i++) {
- for (int j = 0; j < res_expected[i].length; j++) {
- kp.execute2(ko, res_expected[i][j]);
- }
- }
- sums_expected[0][0] = ko._sum;
- // Compare results
- HashMap<MatrixValue.CellIndex, Double> res_actual =
readDMLMatrixFromOutputDir("result");
- double[][] resultActualDouble =
TestUtils.convertHashMapToDoubleArray(res_actual);
- //print2DimDoubleArray(resultActualDouble);
- TestUtils.compareMatrices(sums_expected, resultActualDouble,
1e-14);
- }
- catch(Exception ex) {
- throw new RuntimeException(ex);
-
- }
- finally {
- resetExecMode(rtold);
- }
- }
-
- private void runDedupColSumTest(String testname, Types.ExecMode rt)
- {
- //set runtime platform
- Types.ExecMode rtold = setExecMode(rt);
- try
- {
- int rows = 100;
- int cols = 300;
- getAndLoadTestConfiguration(testname);
- fullDMLScriptName = getScript();
-
- // Generate random embeddings for the distinct tokens
- double[][] a = createRandomMatrix("embeddings", rows, cols, 0, 10,
1, new Date().getTime());
-
- // Generate random distinct tokens
- List<String> strings = generateRandomStrings(rows, 10);
-
- // Generate the dictionary by assigning unique ID to each distinct
token
- Map<String,Integer> map = writeDictToCsvFile(strings,
baseDirectory + INPUT_DIR + "dict");
-
- // Create the dataset by repeating and shuffling the distinct
tokens
- List<String> stringsColumn = shuffleAndMultiplyStrings(strings,
320*6);
- writeStringsToCsvFile(stringsColumn, baseDirectory + INPUT_DIR +
"data");
-
- //run script
- programArgs = new String[]{"-stats","-args", input("embeddings"),
input("data"), input("dict"), output("result")};
- runTest(true, EXCEPTION_NOT_EXPECTED, null, -1);
-
- // Manually derive the expected result
- double[][] res_expected = manuallyDeriveWordEmbeddings(cols, a,
map, stringsColumn);
- double[][] sums_expected = new double[1][res_expected[0].length];
- KahanObject ko = new KahanObject(0,0);
- KahanPlus kp = getKahanPlusFnObject();
- for (int i = 0; i < res_expected[0].length; i++) {
- ko.set(0,0);
- for (int j = 0; j < res_expected.length; j++) {
- kp.execute2(ko, res_expected[j][i]);
- }
- sums_expected[0][i] = ko._sum;
- }
- // Compare results
- HashMap<MatrixValue.CellIndex, Double> res_actual =
readDMLMatrixFromOutputDir("result");
- double[][] resultActualDouble =
TestUtils.convertHashMapToDoubleArray(res_actual);
- //print2DimDoubleArray(resultActualDouble);
- TestUtils.compareMatrices(sums_expected, resultActualDouble, 1e-9);
- }
- catch(Exception ex) {
- throw new RuntimeException(ex);
-
- }
- finally {
- resetExecMode(rtold);
- }
- }
-
- private void runDedupRowSumTest(String testname, Types.ExecMode rt)
- {
- //set runtime platform
- Types.ExecMode rtold = setExecMode(rt);
- try
- {
- int rows = 100;
- int cols = 300;
- getAndLoadTestConfiguration(testname);
- fullDMLScriptName = getScript();
-
- // Generate random embeddings for the distinct tokens
- double[][] a = createRandomMatrix("embeddings", rows, cols, 0, 10,
1, new Date().getTime());
-
- // Generate random distinct tokens
- List<String> strings = generateRandomStrings(rows, 10);
-
- // Generate the dictionary by assigning unique ID to each distinct
token
- Map<String,Integer> map = writeDictToCsvFile(strings,
baseDirectory + INPUT_DIR + "dict");
-
- // Create the dataset by repeating and shuffling the distinct
tokens
- List<String> stringsColumn = shuffleAndMultiplyStrings(strings,
320);
- writeStringsToCsvFile(stringsColumn, baseDirectory + INPUT_DIR +
"data");
-
- //run script
- programArgs = new String[]{"-stats","-args", input("embeddings"),
input("data"), input("dict"), output("result")};
- runTest(true, EXCEPTION_NOT_EXPECTED, null, -1);
-
- // Manually derive the expected result
- double[][] res_expected = manuallyDeriveWordEmbeddings(cols, a,
map, stringsColumn);
- double[][] sums_expected = new double[res_expected.length][1];
- KahanObject ko = new KahanObject(0,0);
- KahanPlus kp = getKahanPlusFnObject();
- for (int i = 0; i < res_expected.length; i++) {
- ko.set(0,0);
- for (int j = 0; j < res_expected[i].length; j++) {
- kp.execute2(ko, res_expected[i][j]);
- }
- sums_expected[i][0] = ko._sum;
- }
- // Compare results
- HashMap<MatrixValue.CellIndex, Double> res_actual =
readDMLMatrixFromOutputDir("result");
- double[][] resultActualDouble =
TestUtils.convertHashMapToDoubleArray(res_actual);
- //print2DimDoubleArray(resultActualDouble);
- TestUtils.compareMatrices(sums_expected, resultActualDouble,
1e-15);
- }
- catch(Exception ex) {
- throw new RuntimeException(ex);
-
- }
- finally {
- resetExecMode(rtold);
- }
- }
+ private final static String TEST_NAME1 =
"TransformFrameEncodeWordEmbeddingsRowSum";
+ private final static String TEST_NAME2 =
"TransformFrameEncodeWordEmbeddingsColSum";
+ private final static String TEST_NAME3 =
"TransformFrameEncodeWordEmbeddingsFullSum";
+ private final static String TEST_DIR = "functions/transform/";
+
+ @Override
+ public void setUp() {
+ TestUtils.clearAssertionInformation();
+ addTestConfiguration(TEST_NAME1, new
TestConfiguration(TEST_DIR, TEST_NAME1));
+ addTestConfiguration(TEST_NAME2, new
TestConfiguration(TEST_DIR, TEST_NAME2));
+ addTestConfiguration(TEST_NAME3, new
TestConfiguration(TEST_DIR, TEST_NAME3));
+ }
+
+ @Test
+ public void testDedupRowSums() {
+ runDedupRowSumTest(TEST_NAME1, Types.ExecMode.SINGLE_NODE);
+ }
+
+ @Test
+ public void testDedupColSums() {
+ runDedupColSumTest(TEST_NAME2, Types.ExecMode.SINGLE_NODE);
+ }
+
+ @Test
+ public void testDedupFullSums() {
+ runDedupFullSumTest(TEST_NAME3, Types.ExecMode.SINGLE_NODE);
+ }
+
+ private void runDedupFullSumTest(String testname, Types.ExecMode rt)
+ {
+ //set runtime platform
+ Types.ExecMode rtold = setExecMode(rt);
+ try
+ {
+ int rows = 100;
+ int cols = 300;
+ getAndLoadTestConfiguration(testname);
+ fullDMLScriptName = getScript();
+
+ // Generate random embeddings for the distinct tokens
+ double[][] a = createRandomMatrix("embeddings", rows,
cols, 0, 10, 1, new Date().getTime());
+
+ // Generate random distinct tokens
+ List<String> strings = generateRandomStrings(rows, 10);
+
+ // Generate the dictionary by assigning unique ID to
each distinct token
+ Map<String,Integer> map = writeDictToCsvFile(strings,
baseDirectory + INPUT_DIR + "dict");
+
+ // Create the dataset by repeating and shuffling the
distinct tokens
+ List<String> stringsColumn =
shuffleAndMultiplyStrings(strings, 320*6);
+ writeStringsToCsvFile(stringsColumn, baseDirectory +
INPUT_DIR + "data");
+
+ //run script
+ programArgs = new String[]{"-stats","-args",
input("embeddings"), input("data"), input("dict"), output("result")};
+ runTest(true, EXCEPTION_NOT_EXPECTED, null, -1);
+
+ // Manually derive the expected result
+ double[][] res_expected =
manuallyDeriveWordEmbeddings(cols, a, map, stringsColumn);
+ double[][] sums_expected = new double[1][1];
+ KahanObject ko = new KahanObject(0,0);
+ KahanPlus kp = getKahanPlusFnObject();
+ for (int i = 0; i < res_expected.length; i++) {
+ for (int j = 0; j < res_expected[i].length;
j++) {
+ kp.execute2(ko, res_expected[i][j]);
+ }
+ }
+ sums_expected[0][0] = ko._sum;
+ // Compare results
+ HashMap<MatrixValue.CellIndex, Double> res_actual =
readDMLMatrixFromOutputDir("result");
+ double[][] resultActualDouble =
TestUtils.convertHashMapToDoubleArray(res_actual);
+ //print2DimDoubleArray(resultActualDouble);
+ TestUtils.compareMatrices(sums_expected,
resultActualDouble, 1e-14);
+ }
+ catch(Exception ex) {
+ throw new RuntimeException(ex);
+
+ }
+ finally {
+ resetExecMode(rtold);
+ }
+ }
+
+ private void runDedupColSumTest(String testname, Types.ExecMode rt)
+ {
+ //set runtime platform
+ Types.ExecMode rtold = setExecMode(rt);
+ try
+ {
+ int rows = 100;
+ int cols = 300;
+ getAndLoadTestConfiguration(testname);
+ fullDMLScriptName = getScript();
+
+ // Generate random embeddings for the distinct tokens
+ double[][] a = createRandomMatrix("embeddings", rows,
cols, 0, 10, 1, new Date().getTime());
+
+ // Generate random distinct tokens
+ List<String> strings = generateRandomStrings(rows, 10);
+
+ // Generate the dictionary by assigning unique ID to
each distinct token
+ Map<String,Integer> map = writeDictToCsvFile(strings,
baseDirectory + INPUT_DIR + "dict");
+
+ // Create the dataset by repeating and shuffling the
distinct tokens
+ List<String> stringsColumn =
shuffleAndMultiplyStrings(strings, 320*6);
+ writeStringsToCsvFile(stringsColumn, baseDirectory +
INPUT_DIR + "data");
+
+ //run script
+ programArgs = new String[]{"-stats","-args",
input("embeddings"), input("data"), input("dict"), output("result")};
+ runTest(true, EXCEPTION_NOT_EXPECTED, null, -1);
+
+ // Manually derive the expected result
+ double[][] res_expected =
manuallyDeriveWordEmbeddings(cols, a, map, stringsColumn);
+ double[][] sums_expected = new
double[1][res_expected[0].length];
+ KahanObject ko = new KahanObject(0,0);
+ KahanPlus kp = getKahanPlusFnObject();
+ for (int i = 0; i < res_expected[0].length; i++) {
+ ko.set(0,0);
+ for (int j = 0; j < res_expected.length; j++) {
+ kp.execute2(ko, res_expected[j][i]);
+ }
+ sums_expected[0][i] = ko._sum;
+ }
+ // Compare results
+ HashMap<MatrixValue.CellIndex, Double> res_actual =
readDMLMatrixFromOutputDir("result");
+ double[][] resultActualDouble =
TestUtils.convertHashMapToDoubleArray(res_actual);
+ //print2DimDoubleArray(resultActualDouble);
+ TestUtils.compareMatrices(sums_expected,
resultActualDouble, 1e-9);
+ }
+ catch(Exception ex) {
+ throw new RuntimeException(ex);
+
+ }
+ finally {
+ resetExecMode(rtold);
+ }
+ }
+
+ private void runDedupRowSumTest(String testname, Types.ExecMode rt)
+ {
+ //set runtime platform
+ Types.ExecMode rtold = setExecMode(rt);
+ try
+ {
+ int rows = 100;
+ int cols = 300;
+ getAndLoadTestConfiguration(testname);
+ fullDMLScriptName = getScript();
+
+ // Generate random embeddings for the distinct tokens
+ double[][] a = createRandomMatrix("embeddings", rows,
cols, 0, 10, 1, new Date().getTime());
+
+ // Generate random distinct tokens
+ List<String> strings = generateRandomStrings(rows, 10);
+
+ // Generate the dictionary by assigning unique ID to
each distinct token
+ Map<String,Integer> map = writeDictToCsvFile(strings,
baseDirectory + INPUT_DIR + "dict");
+
+ // Create the dataset by repeating and shuffling the
distinct tokens
+ List<String> stringsColumn =
shuffleAndMultiplyStrings(strings, 320);
+ writeStringsToCsvFile(stringsColumn, baseDirectory +
INPUT_DIR + "data");
+
+ //run script
+ programArgs = new String[]{"-stats","-args",
input("embeddings"), input("data"), input("dict"), output("result")};
+ runTest(true, EXCEPTION_NOT_EXPECTED, null, -1);
+
+ // Manually derive the expected result
+ double[][] res_expected =
manuallyDeriveWordEmbeddings(cols, a, map, stringsColumn);
+ double[][] sums_expected = new
double[res_expected.length][1];
+ KahanObject ko = new KahanObject(0,0);
+ KahanPlus kp = getKahanPlusFnObject();
+ for (int i = 0; i < res_expected.length; i++) {
+ ko.set(0,0);
+ for (int j = 0; j < res_expected[i].length;
j++) {
+ kp.execute2(ko, res_expected[i][j]);
+ }
+ sums_expected[i][0] = ko._sum;
+ }
+ // Compare results
+ HashMap<MatrixValue.CellIndex, Double> res_actual =
readDMLMatrixFromOutputDir("result");
+ double[][] resultActualDouble =
TestUtils.convertHashMapToDoubleArray(res_actual);
+ //print2DimDoubleArray(resultActualDouble);
+ TestUtils.compareMatrices(sums_expected,
resultActualDouble, 1e-15);
+ }
+ catch(Exception ex) {
+ throw new RuntimeException(ex);
+
+ }
+ finally {
+ resetExecMode(rtold);
+ }
+ }
}