This is an automated email from the ASF dual-hosted git repository. mboehm7 pushed a commit to branch main in repository https://gitbox.apache.org/repos/asf/systemds.git
commit 28f929fd5d31bc53615bd16ea82a9fd0852b2cbe Author: Matthias Boehm <[email protected]> AuthorDate: Thu Jun 29 20:47:54 2023 +0200 [MINOR] Fix misc warnings and formatting issues --- .../lops/rewrite/RewriteUpdateGPUPlacements.java | 1 - .../sysds/runtime/data/DenseBlockFP64DEDUP.java | 447 +++++++++--------- .../sysds/runtime/data/DenseBlockLFP64DEDUP.java | 423 ++++++++--------- .../TransformFrameEncodeWordEmbedding2Test.java | 509 +++++++++++---------- 4 files changed, 691 insertions(+), 689 deletions(-) diff --git a/src/main/java/org/apache/sysds/lops/rewrite/RewriteUpdateGPUPlacements.java b/src/main/java/org/apache/sysds/lops/rewrite/RewriteUpdateGPUPlacements.java index 6bdba608a2..8491b3693f 100644 --- a/src/main/java/org/apache/sysds/lops/rewrite/RewriteUpdateGPUPlacements.java +++ b/src/main/java/org/apache/sysds/lops/rewrite/RewriteUpdateGPUPlacements.java @@ -31,7 +31,6 @@ import org.apache.sysds.runtime.matrix.data.MatrixBlock; import java.util.ArrayList; import java.util.List; -import java.util.stream.Collectors; public class RewriteUpdateGPUPlacements extends LopRewriteRule { diff --git a/src/main/java/org/apache/sysds/runtime/data/DenseBlockFP64DEDUP.java b/src/main/java/org/apache/sysds/runtime/data/DenseBlockFP64DEDUP.java index bf0e83ec5b..40eb4c5018 100644 --- a/src/main/java/org/apache/sysds/runtime/data/DenseBlockFP64DEDUP.java +++ b/src/main/java/org/apache/sysds/runtime/data/DenseBlockFP64DEDUP.java @@ -27,227 +27,228 @@ import java.util.Arrays; import java.util.HashMap; public class DenseBlockFP64DEDUP extends DenseBlockDRB{ - private double[][] _data; - - protected DenseBlockFP64DEDUP(int[] dims) { - super(dims); - reset(_rlen, _odims, 0); - } - - @Override - protected void allocateBlock(int bix, int length) { - _data[bix] = new double[length]; - } - - @Override - public void reset(int rlen, int[] odims, double v) { - if(rlen > capacity() / _odims[0]) - _data = new double[rlen][]; - else{ - if(v == 0.0){ - for(int i = 0; i < rlen; i++) - _data[i] = null; - } - else { - for(int i = 0; i < rlen; i++){ - if(odims[0] > _odims[0] ||_data[i] == null ) - allocateBlock(i, odims[0]); - Arrays.fill(_data[i], 0, odims[0], v); - } - } - } - _rlen = rlen; - _odims = odims; - } - - @Override - public void resetNoFill(int rlen, int[] odims) { - if(_data == null || rlen > _rlen){ - _data = new double[rlen][]; - } - _rlen = rlen; - _odims = odims; - } - - @Override - public boolean isNumeric() { - return true; - } - - @Override - public boolean isNumeric(Types.ValueType vt) { - return Types.ValueType.FP64 == vt; - } - - @Override - public long capacity() { - return (_data != null) ? _data.length*_odims[0] : -1; - } - - @Override - public long countNonZeros(){ - long nnz = 0; - HashMap<double[], Long> cache = new HashMap<double[], Long>(); - for (int i = 0; i < _rlen; i++) { - double[] row = this._data[i]; - if(row == null) - continue; - Long count = cache.getOrDefault(row, null); - if(count == null){ - count = Long.valueOf(countNonZeros(i)); - cache.put(row, count); - } - nnz += count; - } - return nnz; - } - - @Override - public int countNonZeros(int r) { - return _data[r] == null ? 0 : UtilFunctions.computeNnz(_data[r], 0, _odims[0]); - } - - @Override - protected long computeNnz(int bix, int start, int length) { - int nnz = 0; - int row_start = (int) Math.floor(start / _odims[0]); - int col_start = start % _odims[0]; - for (int i = 0; i < length; i++) { - if(_data[row_start] == null){ - i += _odims[0] - 1 - col_start; - col_start = 0; - row_start += 1; - continue; - } - nnz += _data[row_start][col_start] != 0 ? 1 : 0; - col_start += 1; - if(col_start == _odims[0]) { - col_start = 0; - row_start += 1; - } - } - return nnz; - } - - @Override - public int pos(int r){ - return 0; - } - - @Override - public int pos(int[] ix){ - int pos = ix[ix.length - 1]; - for(int i = 1; i < ix.length - 1; i++) - pos += ix[i] * _odims[i]; - return pos; - } - - @Override - public double[] values(int r) { - return valuesAt(r); - } - - @Override - public double[] valuesAt(int bix) { - return _data[bix] == null ? new double[_odims[0]] : _data[bix]; - } - - @Override - public int index(int r) { - return r; - } - - @Override - public int numBlocks(){ - return _data.length; - } - - @Override - public int size(int bix) { - return _odims[0]; - } - - @Override - public void incr(int r, int c) { - incr(r,c,1.0); - } - - @Override - public void incr(int r, int c, double delta) { - if(_data[r] == null) - allocateBlock(r, _odims[0]); - _data[r][c] += delta; - } - - @Override - protected void fillBlock(int bix, int fromIndex, int toIndex, double v) { - if(_data[bix] == null) - allocateBlock(bix, _odims[0]); - Arrays.fill(_data[bix], fromIndex, toIndex, v); - } - - @Override - protected void setInternal(int bix, int ix, double v) { - set(bix, ix, v); - } - - @Override - public DenseBlock set(int r, int c, double v) { - if(_data[r] == null) - _data[r] = new double[_odims[0]]; - _data[r][c] = v; - return this; - } - - @Override - public DenseBlock set(int r, double[] v) { - if(v.length == _odims[0]) - _data[r] = v; - else - throw new RuntimeException("set Denseblock called with an array length [" + v.length +"], array to overwrite is of length [" + _odims[0] + "]"); - return this; - } - - @Override - public DenseBlock set(DenseBlock db) { - throw new NotImplementedException(); - } - - @Override - public DenseBlock set(int[] ix, double v) { - return set(ix[0], pos(ix), v); - } - - @Override - public DenseBlock set(int[] ix, long v) { - return set(ix[0], pos(ix), v); - } - - @Override - public DenseBlock set(int[] ix, String v) { - return set(ix[0], pos(ix), Double.parseDouble(v)); - } - - @Override - public double get(int r, int c) { - if(_data[r] == null) - return 0.0; - else - return _data[r][c]; - } - - @Override - public double get(int[] ix) { - return get(ix[0], pos(ix)); - } - - @Override - public String getString(int[] ix) { - return String.valueOf(get(ix[0], pos(ix))); - } - - @Override - public long getLong(int[] ix) { - return UtilFunctions.toLong(get(ix[0], pos(ix))); - } + private static final long serialVersionUID = 4124905190428752213L; + private double[][] _data; + + protected DenseBlockFP64DEDUP(int[] dims) { + super(dims); + reset(_rlen, _odims, 0); + } + + @Override + protected void allocateBlock(int bix, int length) { + _data[bix] = new double[length]; + } + + @Override + public void reset(int rlen, int[] odims, double v) { + if(rlen > capacity() / _odims[0]) + _data = new double[rlen][]; + else{ + if(v == 0.0){ + for(int i = 0; i < rlen; i++) + _data[i] = null; + } + else { + for(int i = 0; i < rlen; i++){ + if(odims[0] > _odims[0] ||_data[i] == null ) + allocateBlock(i, odims[0]); + Arrays.fill(_data[i], 0, odims[0], v); + } + } + } + _rlen = rlen; + _odims = odims; + } + + @Override + public void resetNoFill(int rlen, int[] odims) { + if(_data == null || rlen > _rlen){ + _data = new double[rlen][]; + } + _rlen = rlen; + _odims = odims; + } + + @Override + public boolean isNumeric() { + return true; + } + + @Override + public boolean isNumeric(Types.ValueType vt) { + return Types.ValueType.FP64 == vt; + } + + @Override + public long capacity() { + return (_data != null) ? _data.length*_odims[0] : -1; + } + + @Override + public long countNonZeros(){ + long nnz = 0; + HashMap<double[], Long> cache = new HashMap<double[], Long>(); + for (int i = 0; i < _rlen; i++) { + double[] row = this._data[i]; + if(row == null) + continue; + Long count = cache.getOrDefault(row, null); + if(count == null){ + count = Long.valueOf(countNonZeros(i)); + cache.put(row, count); + } + nnz += count; + } + return nnz; + } + + @Override + public int countNonZeros(int r) { + return _data[r] == null ? 0 : UtilFunctions.computeNnz(_data[r], 0, _odims[0]); + } + + @Override + protected long computeNnz(int bix, int start, int length) { + int nnz = 0; + int row_start = (int) Math.floor(start / _odims[0]); + int col_start = start % _odims[0]; + for (int i = 0; i < length; i++) { + if(_data[row_start] == null){ + i += _odims[0] - 1 - col_start; + col_start = 0; + row_start += 1; + continue; + } + nnz += _data[row_start][col_start] != 0 ? 1 : 0; + col_start += 1; + if(col_start == _odims[0]) { + col_start = 0; + row_start += 1; + } + } + return nnz; + } + + @Override + public int pos(int r){ + return 0; + } + + @Override + public int pos(int[] ix){ + int pos = ix[ix.length - 1]; + for(int i = 1; i < ix.length - 1; i++) + pos += ix[i] * _odims[i]; + return pos; + } + + @Override + public double[] values(int r) { + return valuesAt(r); + } + + @Override + public double[] valuesAt(int bix) { + return _data[bix] == null ? new double[_odims[0]] : _data[bix]; + } + + @Override + public int index(int r) { + return r; + } + + @Override + public int numBlocks(){ + return _data.length; + } + + @Override + public int size(int bix) { + return _odims[0]; + } + + @Override + public void incr(int r, int c) { + incr(r,c,1.0); + } + + @Override + public void incr(int r, int c, double delta) { + if(_data[r] == null) + allocateBlock(r, _odims[0]); + _data[r][c] += delta; + } + + @Override + protected void fillBlock(int bix, int fromIndex, int toIndex, double v) { + if(_data[bix] == null) + allocateBlock(bix, _odims[0]); + Arrays.fill(_data[bix], fromIndex, toIndex, v); + } + + @Override + protected void setInternal(int bix, int ix, double v) { + set(bix, ix, v); + } + + @Override + public DenseBlock set(int r, int c, double v) { + if(_data[r] == null) + _data[r] = new double[_odims[0]]; + _data[r][c] = v; + return this; + } + + @Override + public DenseBlock set(int r, double[] v) { + if(v.length == _odims[0]) + _data[r] = v; + else + throw new RuntimeException("set Denseblock called with an array length [" + v.length +"], array to overwrite is of length [" + _odims[0] + "]"); + return this; + } + + @Override + public DenseBlock set(DenseBlock db) { + throw new NotImplementedException(); + } + + @Override + public DenseBlock set(int[] ix, double v) { + return set(ix[0], pos(ix), v); + } + + @Override + public DenseBlock set(int[] ix, long v) { + return set(ix[0], pos(ix), v); + } + + @Override + public DenseBlock set(int[] ix, String v) { + return set(ix[0], pos(ix), Double.parseDouble(v)); + } + + @Override + public double get(int r, int c) { + if(_data[r] == null) + return 0.0; + else + return _data[r][c]; + } + + @Override + public double get(int[] ix) { + return get(ix[0], pos(ix)); + } + + @Override + public String getString(int[] ix) { + return String.valueOf(get(ix[0], pos(ix))); + } + + @Override + public long getLong(int[] ix) { + return UtilFunctions.toLong(get(ix[0], pos(ix))); + } } diff --git a/src/main/java/org/apache/sysds/runtime/data/DenseBlockLFP64DEDUP.java b/src/main/java/org/apache/sysds/runtime/data/DenseBlockLFP64DEDUP.java index 5a4249c920..03b072cc4f 100644 --- a/src/main/java/org/apache/sysds/runtime/data/DenseBlockLFP64DEDUP.java +++ b/src/main/java/org/apache/sysds/runtime/data/DenseBlockLFP64DEDUP.java @@ -27,215 +27,216 @@ import java.util.Arrays; import java.util.HashMap; public class DenseBlockLFP64DEDUP extends DenseBlockLDRB{ - //WIP - private double[][] _data; - - protected DenseBlockLFP64DEDUP(int[] dims) { - super(dims); - reset(_rlen, _odims, 0); - } - - @Override - protected void allocateBlocks(int numBlocks) { - _data = new double[numBlocks][]; - } - - @Override - protected void allocateBlock(int bix, int length) { - _data[bix] = new double[length]; - } - - @Override - public void reset(int rlen, int[] odims, double v) { - if(rlen > capacity() / _odims[0]) { - this.allocateBlocks(rlen); - if (v != 0.0) { - for (int i = 0; i < rlen; i++) { - allocateBlock(i, odims[0]); - Arrays.fill(_data[i], 0, odims[0], v); - } - } - } - else{ - if(v == 0.0){ - for(int i = 0; i < rlen; i++) - _data[i] = null; - } - else { - for(int i = 0; i < rlen; i++){ - if(odims[0] > _odims[0] ||_data[i] == null ) - allocateBlock(i, odims[0]); - Arrays.fill(_data[i], 0, odims[0], v); - } - } - } - _blen = 1; - _rlen = rlen; - _odims = odims; - } - - @Override - public boolean isNumeric() { - return true; - } - - @Override - public boolean isNumeric(Types.ValueType vt) { - return Types.ValueType.FP64 == vt; - } - - @Override - public boolean isContiguous() { - return false; - } - - @Override - public long capacity() { - return (_data != null) ? _data.length*_odims[0] : -1; - } - - @Override - public long countNonZeros(){ - long nnz = 0; - HashMap<double[], Long> cache = new HashMap<double[], Long>(); - for (int i = 0; i < _rlen; i++) { - double[] row = this._data[i]; - if(row == null) - continue; - Long count = cache.getOrDefault(row, null); - if(count == null){ - count = Long.valueOf(countNonZeros(i)); - cache.put(row, count); - } - nnz += count; - } - return nnz; - } - - @Override - public int countNonZeros(int r) { - return _data[r] == null ? 0 : UtilFunctions.computeNnz(_data[r], 0, _odims[0]); - } - - @Override - protected long computeNnz(int bix, int start, int length) { - int nnz = 0; - int row_start = (int) Math.floor(start / _odims[0]); - int col_start = start % _odims[0]; - for (int i = 0; i < length; i++) { - if(_data[row_start] == null){ - i += _odims[0] - 1 - col_start; - col_start = 0; - row_start += 1; - continue; - } - nnz += _data[row_start][col_start] != 0 ? 1 : 0; - col_start += 1; - if(col_start == _odims[0]) { - col_start = 0; - row_start += 1; - } - } - return nnz; - } - - @Override - public int pos(int r){ - return 0; - } - - @Override - public double[] values(int r) { - if(_data[r] == null) - allocateBlock(r, _odims[0]); - return _data[r]; - } - - @Override - public double[] valuesAt(int bix) { - return values(bix); - } - - - @Override - public int numBlocks(){ - return _data.length; - } - - @Override - public void incr(int r, int c) { - throw new NotImplementedException(); - } - - @Override - public void incr(int r, int c, double delta) { - throw new NotImplementedException(); - } - - @Override - protected void fillBlock(int bix, int fromIndex, int toIndex, double v) { - throw new NotImplementedException(); - } - - @Override - protected void setInternal(int bix, int ix, double v) { - throw new NotImplementedException(); - } - - @Override - public DenseBlock set(int r, int c, double v) { - if(_data[r] == null) - _data[r] = new double[_odims[0]]; - _data[r][c] = v; - return this; - } - - @Override - public DenseBlock set(int r, double[] v) { - if(v.length == _odims[0]) - _data[r] = v; - else - throw new RuntimeException("set Denseblock called with an array length [" + v.length +"], array to overwrite is of length [" + _odims[0] + "]"); - return this; - } - - @Override - public DenseBlock set(DenseBlock db) { - throw new NotImplementedException(); - } - - @Override - public DenseBlock set(int[] ix, double v) { - throw new NotImplementedException(); - } - - @Override - public DenseBlock set(int[] ix, long v) { - throw new NotImplementedException(); - } - - @Override - public DenseBlock set(int[] ix, String v) { - throw new NotImplementedException(); - } - - @Override - public double get(int r, int c) { - return _data[r][c]; - } - - @Override - public double get(int[] ix) { - throw new NotImplementedException(); - } - - @Override - public String getString(int[] ix) { - throw new NotImplementedException(); - } - - @Override - public long getLong(int[] ix) { - throw new NotImplementedException(); - } + private static final long serialVersionUID = -3437790596644064171L; + //WIP + private double[][] _data; + + protected DenseBlockLFP64DEDUP(int[] dims) { + super(dims); + reset(_rlen, _odims, 0); + } + + @Override + protected void allocateBlocks(int numBlocks) { + _data = new double[numBlocks][]; + } + + @Override + protected void allocateBlock(int bix, int length) { + _data[bix] = new double[length]; + } + + @Override + public void reset(int rlen, int[] odims, double v) { + if(rlen > capacity() / _odims[0]) { + this.allocateBlocks(rlen); + if (v != 0.0) { + for (int i = 0; i < rlen; i++) { + allocateBlock(i, odims[0]); + Arrays.fill(_data[i], 0, odims[0], v); + } + } + } + else{ + if(v == 0.0){ + for(int i = 0; i < rlen; i++) + _data[i] = null; + } + else { + for(int i = 0; i < rlen; i++){ + if(odims[0] > _odims[0] ||_data[i] == null ) + allocateBlock(i, odims[0]); + Arrays.fill(_data[i], 0, odims[0], v); + } + } + } + _blen = 1; + _rlen = rlen; + _odims = odims; + } + + @Override + public boolean isNumeric() { + return true; + } + + @Override + public boolean isNumeric(Types.ValueType vt) { + return Types.ValueType.FP64 == vt; + } + + @Override + public boolean isContiguous() { + return false; + } + + @Override + public long capacity() { + return (_data != null) ? _data.length*_odims[0] : -1; + } + + @Override + public long countNonZeros(){ + long nnz = 0; + HashMap<double[], Long> cache = new HashMap<double[], Long>(); + for (int i = 0; i < _rlen; i++) { + double[] row = this._data[i]; + if(row == null) + continue; + Long count = cache.getOrDefault(row, null); + if(count == null){ + count = Long.valueOf(countNonZeros(i)); + cache.put(row, count); + } + nnz += count; + } + return nnz; + } + + @Override + public int countNonZeros(int r) { + return _data[r] == null ? 0 : UtilFunctions.computeNnz(_data[r], 0, _odims[0]); + } + + @Override + protected long computeNnz(int bix, int start, int length) { + int nnz = 0; + int row_start = (int) Math.floor(start / _odims[0]); + int col_start = start % _odims[0]; + for (int i = 0; i < length; i++) { + if(_data[row_start] == null){ + i += _odims[0] - 1 - col_start; + col_start = 0; + row_start += 1; + continue; + } + nnz += _data[row_start][col_start] != 0 ? 1 : 0; + col_start += 1; + if(col_start == _odims[0]) { + col_start = 0; + row_start += 1; + } + } + return nnz; + } + + @Override + public int pos(int r){ + return 0; + } + + @Override + public double[] values(int r) { + if(_data[r] == null) + allocateBlock(r, _odims[0]); + return _data[r]; + } + + @Override + public double[] valuesAt(int bix) { + return values(bix); + } + + + @Override + public int numBlocks(){ + return _data.length; + } + + @Override + public void incr(int r, int c) { + throw new NotImplementedException(); + } + + @Override + public void incr(int r, int c, double delta) { + throw new NotImplementedException(); + } + + @Override + protected void fillBlock(int bix, int fromIndex, int toIndex, double v) { + throw new NotImplementedException(); + } + + @Override + protected void setInternal(int bix, int ix, double v) { + throw new NotImplementedException(); + } + + @Override + public DenseBlock set(int r, int c, double v) { + if(_data[r] == null) + _data[r] = new double[_odims[0]]; + _data[r][c] = v; + return this; + } + + @Override + public DenseBlock set(int r, double[] v) { + if(v.length == _odims[0]) + _data[r] = v; + else + throw new RuntimeException("set Denseblock called with an array length [" + v.length +"], array to overwrite is of length [" + _odims[0] + "]"); + return this; + } + + @Override + public DenseBlock set(DenseBlock db) { + throw new NotImplementedException(); + } + + @Override + public DenseBlock set(int[] ix, double v) { + throw new NotImplementedException(); + } + + @Override + public DenseBlock set(int[] ix, long v) { + throw new NotImplementedException(); + } + + @Override + public DenseBlock set(int[] ix, String v) { + throw new NotImplementedException(); + } + + @Override + public double get(int r, int c) { + return _data[r][c]; + } + + @Override + public double get(int[] ix) { + throw new NotImplementedException(); + } + + @Override + public String getString(int[] ix) { + throw new NotImplementedException(); + } + + @Override + public long getLong(int[] ix) { + throw new NotImplementedException(); + } } diff --git a/src/test/java/org/apache/sysds/test/functions/transform/TransformFrameEncodeWordEmbedding2Test.java b/src/test/java/org/apache/sysds/test/functions/transform/TransformFrameEncodeWordEmbedding2Test.java index 06c8b6ee0b..4787d35bcf 100644 --- a/src/test/java/org/apache/sysds/test/functions/transform/TransformFrameEncodeWordEmbedding2Test.java +++ b/src/test/java/org/apache/sysds/test/functions/transform/TransformFrameEncodeWordEmbedding2Test.java @@ -41,258 +41,259 @@ import java.util.Random; public class TransformFrameEncodeWordEmbedding2Test extends AutomatedTestBase { - private final static String TEST_NAME1 = "TransformFrameEncodeWordEmbeddings2"; - private final static String TEST_NAME2a = "TransformFrameEncodeWordEmbeddings2MultiCols1"; - private final static String TEST_NAME2b = "TransformFrameEncodeWordEmbeddings2MultiCols2"; - - private final static String TEST_DIR = "functions/transform/"; - private final static String TEST_CLASS_DIR = TEST_DIR + TransformFrameEncodeWordEmbedding1Test.class.getSimpleName() + "/"; - - @Override - public void setUp() { - TestUtils.clearAssertionInformation(); - addTestConfiguration(TEST_NAME1, new TestConfiguration(TEST_DIR, TEST_NAME1)); - addTestConfiguration(TEST_NAME2a, new TestConfiguration(TEST_DIR, TEST_NAME2a)); - addTestConfiguration(TEST_NAME2b, new TestConfiguration(TEST_DIR, TEST_NAME2b)); - } - - @Test - public void testTransformToWordEmbeddings() { - runTransformTest(TEST_NAME1, ExecMode.SINGLE_NODE); - } - - @Test - @Ignore - public void testNonRandomTransformToWordEmbeddings2Cols() { - runTransformTest(TEST_NAME2a, ExecMode.SINGLE_NODE); - } - - @Test - @Ignore - public void testRandomTransformToWordEmbeddings4Cols() { - runTransformTestMultiCols(TEST_NAME2b, ExecMode.SINGLE_NODE); - } - - @Test - @Ignore - public void runBenchmark(){ - runBenchmark(TEST_NAME1, ExecMode.SINGLE_NODE); - } - - - private void runBenchmark(String testname, ExecMode rt) - { - //set runtime platform - ExecMode rtold = setExecMode(rt); - try - { - int rows = 100; - int cols = 300; - getAndLoadTestConfiguration(testname); - fullDMLScriptName = getScript(); - - // Generate random embeddings for the distinct tokens - double[][] a = createRandomMatrix("embeddings", rows, cols, 0, 10, 1, new Date().getTime()); - - // Generate random distinct tokens - List<String> strings = generateRandomStrings(rows, 10); - - // Generate the dictionary by assigning unique ID to each distinct token - Map<String,Integer> map = writeDictToCsvFile(strings, baseDirectory + INPUT_DIR + "dict"); - - // Create the dataset by repeating and shuffling the distinct tokens - List<String> stringsColumn = shuffleAndMultiplyStrings(strings, 320); - writeStringsToCsvFile(stringsColumn, baseDirectory + INPUT_DIR + "data"); - - //run script - programArgs = new String[]{"-stats","-args", input("embeddings"), input("data"), input("dict"), output("result")}; - runTest(true, EXCEPTION_NOT_EXPECTED, null, -1); - } - catch(Exception ex) { - throw new RuntimeException(ex); - - } - finally { - resetExecMode(rtold); - } - } - - private void runTransformTest(String testname, ExecMode rt) - { - //set runtime platform - ExecMode rtold = setExecMode(rt); - try - { - int rows = 100; - int cols = 300; - getAndLoadTestConfiguration(testname); - fullDMLScriptName = getScript(); - - // Generate random embeddings for the distinct tokens - double[][] a = createRandomMatrix("embeddings", rows, cols, 0, 10, 1, new Date().getTime()); - - // Generate random distinct tokens - List<String> strings = generateRandomStrings(rows, 10); - - // Generate the dictionary by assigning unique ID to each distinct token - Map<String,Integer> map = writeDictToCsvFile(strings, baseDirectory + INPUT_DIR + "dict"); - - // Create the dataset by repeating and shuffling the distinct tokens - List<String> stringsColumn = shuffleAndMultiplyStrings(strings, 32); - writeStringsToCsvFile(stringsColumn, baseDirectory + INPUT_DIR + "data"); - - //run script - programArgs = new String[]{"-stats","-args", input("embeddings"), input("data"), input("dict"), output("result")}; - runTest(true, EXCEPTION_NOT_EXPECTED, null, -1); - - // Manually derive the expected result - double[][] res_expected = manuallyDeriveWordEmbeddings(cols, a, map, stringsColumn); - - // Compare results - HashMap<MatrixValue.CellIndex, Double> res_actual = readDMLMatrixFromOutputDir("result"); - double[][] resultActualDouble = TestUtils.convertHashMapToDoubleArray(res_actual); - TestUtils.compareMatrices(resultActualDouble, res_expected, 1e-6); - } - catch(Exception ex) { - throw new RuntimeException(ex); - - } - finally { - resetExecMode(rtold); - } - } - - private void print2DimDoubleArray(double[][] resultActualDouble) { - Arrays.stream(resultActualDouble).forEach( - e -> System.out.println(Arrays.stream(e).mapToObj(d -> String.format("%06.1f", d)) - .reduce("", (sub, elem) -> sub + " " + elem))); - } - - private void runTransformTestMultiCols(String testname, ExecMode rt) - { - //set runtime platform - ExecMode rtold = setExecMode(rt); - try - { - int rows = 100; - int cols = 100; - getAndLoadTestConfiguration(testname); - fullDMLScriptName = getScript(); - - // Generate random embeddings for the distinct tokens - double[][] a = createRandomMatrix("embeddings", rows, cols, 0, 10, 1, new Date().getTime()); - - // Generate random distinct tokens - List<String> strings = generateRandomStrings(rows, 10); - - // Generate the dictionary by assigning unique ID to each distinct token - Map<String,Integer> map = writeDictToCsvFile(strings, baseDirectory + INPUT_DIR + "dict"); - - // Create the dataset by repeating and shuffling the distinct tokens - List<String> stringsColumn = shuffleAndMultiplyStrings(strings, 10); - writeStringsToCsvFile(stringsColumn, baseDirectory + INPUT_DIR + "data"); - - //run script - programArgs = new String[]{"-stats","-args", input("embeddings"), input("data"), input("dict"), output("result"), output("result2")}; - runTest(true, EXCEPTION_NOT_EXPECTED, null, -1); - - // Manually derive the expected result - double[][] res_expected = manuallyDeriveWordEmbeddings(cols, a, map, stringsColumn); - - // Compare results - HashMap<MatrixValue.CellIndex, Double> res_actual = readDMLMatrixFromOutputDir("result"); - HashMap<MatrixValue.CellIndex, Double> res_actual2 = readDMLMatrixFromOutputDir("result2"); - double[][] resultActualDouble = TestUtils.convertHashMapToDoubleArray(res_actual); - double[][] resultActualDouble2 = TestUtils.convertHashMapToDoubleArray(res_actual2); - //System.out.println("Actual Result1 [" + resultActualDouble.length + "x" + resultActualDouble[0].length + "]:"); - ///print2DimDoubleArray(resultActualDouble); - //System.out.println("\nActual Result2 [" + resultActualDouble.length + "x" + resultActualDouble[0].length + "]:"); - //print2DimDoubleArray(resultActualDouble2); - //System.out.println("\nExpected Result [" + res_expected.length + "x" + res_expected[0].length + "]:"); - //print2DimDoubleArray(res_expected); - TestUtils.compareMatrices(resultActualDouble, res_expected, 1e-6); - TestUtils.compareMatrices(resultActualDouble, resultActualDouble2, 1e-6); - } - catch(Exception ex) { - throw new RuntimeException(ex); - - } - finally { - resetExecMode(rtold); - } - } - - private double[][] manuallyDeriveWordEmbeddings(int cols, double[][] a, Map<String, Integer> map, List<String> stringsColumn) { - // Manually derive the expected result - double[][] res_expected = new double[stringsColumn.size()][cols]; - for (int i = 0; i < stringsColumn.size(); i++) { - int rowMapped = map.get(stringsColumn.get(i)); - System.arraycopy(a[rowMapped], 0, res_expected[i], 0, cols); - } - return res_expected; - } - - private double[][] generateWordEmbeddings(int rows, int cols) { - double[][] a = new double[rows][cols]; - for (int i = 0; i < a.length; i++) { - for (int j = 0; j < a[i].length; j++) { - a[i][j] = cols *i + j; - } - - } - return a; - } - - public static List<String> shuffleAndMultiplyStrings(List<String> strings, int multiply){ - List<String> out = new ArrayList<>(); - Random random = new Random(); - for (int i = 0; i < strings.size()*multiply; i++) { - out.add(strings.get(random.nextInt(strings.size()))); - } - return out; - } - - public static List<String> generateRandomStrings(int numStrings, int stringLength) { - List<String> randomStrings = new ArrayList<>(); - Random random = new Random(); - String characters = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789"; - for (int i = 0; i < numStrings; i++) { - randomStrings.add(generateRandomString(random, stringLength, characters)); - } - return randomStrings; - } - - public static String generateRandomString(Random random, int stringLength, String characters){ - StringBuilder randomString = new StringBuilder(); - for (int j = 0; j < stringLength; j++) { - int randomIndex = random.nextInt(characters.length()); - randomString.append(characters.charAt(randomIndex)); - } - return randomString.toString(); - } - - public static void writeStringsToCsvFile(List<String> strings, String fileName) { - try (BufferedWriter bw = new BufferedWriter(new FileWriter(fileName))) { - for (String line : strings) { - bw.write(line); - bw.newLine(); - } - } catch (IOException e) { - e.printStackTrace(); - } - } - - public static Map<String,Integer> writeDictToCsvFile(List<String> strings, String fileName) { - try (BufferedWriter bw = new BufferedWriter(new FileWriter(fileName))) { - Map<String,Integer> map = new HashMap<>(); - for (int i = 0; i < strings.size(); i++) { - map.put(strings.get(i), i); - bw.write(strings.get(i) + Lop.DATATYPE_PREFIX + (i+1) + "\n"); - } - return map; - } catch (IOException e) { - e.printStackTrace(); - return null; - } - } + private final static String TEST_NAME1 = "TransformFrameEncodeWordEmbeddings2"; + private final static String TEST_NAME2a = "TransformFrameEncodeWordEmbeddings2MultiCols1"; + private final static String TEST_NAME2b = "TransformFrameEncodeWordEmbeddings2MultiCols2"; + + private final static String TEST_DIR = "functions/transform/"; + + @Override + public void setUp() { + TestUtils.clearAssertionInformation(); + addTestConfiguration(TEST_NAME1, new TestConfiguration(TEST_DIR, TEST_NAME1)); + addTestConfiguration(TEST_NAME2a, new TestConfiguration(TEST_DIR, TEST_NAME2a)); + addTestConfiguration(TEST_NAME2b, new TestConfiguration(TEST_DIR, TEST_NAME2b)); + } + + @Test + public void testTransformToWordEmbeddings() { + runTransformTest(TEST_NAME1, ExecMode.SINGLE_NODE); + } + + @Test + @Ignore + public void testNonRandomTransformToWordEmbeddings2Cols() { + runTransformTest(TEST_NAME2a, ExecMode.SINGLE_NODE); + } + + @Test + @Ignore + public void testRandomTransformToWordEmbeddings4Cols() { + runTransformTestMultiCols(TEST_NAME2b, ExecMode.SINGLE_NODE); + } + + @Test + @Ignore + public void runBenchmark(){ + runBenchmark(TEST_NAME1, ExecMode.SINGLE_NODE); + } + + + private void runBenchmark(String testname, ExecMode rt) + { + //set runtime platform + ExecMode rtold = setExecMode(rt); + try + { + int rows = 100; + //int cols = 300; + getAndLoadTestConfiguration(testname); + fullDMLScriptName = getScript(); + + // Generate random embeddings for the distinct tokens + // double[][] a = createRandomMatrix("embeddings", rows, cols, 0, 10, 1, new Date().getTime()); + + // Generate random distinct tokens + List<String> strings = generateRandomStrings(rows, 10); + + // Generate the dictionary by assigning unique ID to each distinct token + writeDictToCsvFile(strings, baseDirectory + INPUT_DIR + "dict"); + + // Create the dataset by repeating and shuffling the distinct tokens + List<String> stringsColumn = shuffleAndMultiplyStrings(strings, 320); + writeStringsToCsvFile(stringsColumn, baseDirectory + INPUT_DIR + "data"); + + //run script + programArgs = new String[]{"-stats","-args", input("embeddings"), input("data"), input("dict"), output("result")}; + runTest(true, EXCEPTION_NOT_EXPECTED, null, -1); + } + catch(Exception ex) { + throw new RuntimeException(ex); + + } + finally { + resetExecMode(rtold); + } + } + + private void runTransformTest(String testname, ExecMode rt) + { + //set runtime platform + ExecMode rtold = setExecMode(rt); + try + { + int rows = 100; + int cols = 300; + getAndLoadTestConfiguration(testname); + fullDMLScriptName = getScript(); + + // Generate random embeddings for the distinct tokens + double[][] a = createRandomMatrix("embeddings", rows, cols, 0, 10, 1, new Date().getTime()); + + // Generate random distinct tokens + List<String> strings = generateRandomStrings(rows, 10); + + // Generate the dictionary by assigning unique ID to each distinct token + Map<String,Integer> map = writeDictToCsvFile(strings, baseDirectory + INPUT_DIR + "dict"); + + // Create the dataset by repeating and shuffling the distinct tokens + List<String> stringsColumn = shuffleAndMultiplyStrings(strings, 32); + writeStringsToCsvFile(stringsColumn, baseDirectory + INPUT_DIR + "data"); + + //run script + programArgs = new String[]{"-stats","-args", input("embeddings"), input("data"), input("dict"), output("result")}; + runTest(true, EXCEPTION_NOT_EXPECTED, null, -1); + + // Manually derive the expected result + double[][] res_expected = manuallyDeriveWordEmbeddings(cols, a, map, stringsColumn); + + // Compare results + HashMap<MatrixValue.CellIndex, Double> res_actual = readDMLMatrixFromOutputDir("result"); + double[][] resultActualDouble = TestUtils.convertHashMapToDoubleArray(res_actual); + TestUtils.compareMatrices(resultActualDouble, res_expected, 1e-6); + } + catch(Exception ex) { + throw new RuntimeException(ex); + + } + finally { + resetExecMode(rtold); + } + } + + @SuppressWarnings("unused") + private void print2DimDoubleArray(double[][] resultActualDouble) { + Arrays.stream(resultActualDouble).forEach( + e -> System.out.println(Arrays.stream(e).mapToObj(d -> String.format("%06.1f", d)) + .reduce("", (sub, elem) -> sub + " " + elem))); + } + + private void runTransformTestMultiCols(String testname, ExecMode rt) + { + //set runtime platform + ExecMode rtold = setExecMode(rt); + try + { + int rows = 100; + int cols = 100; + getAndLoadTestConfiguration(testname); + fullDMLScriptName = getScript(); + + // Generate random embeddings for the distinct tokens + double[][] a = createRandomMatrix("embeddings", rows, cols, 0, 10, 1, new Date().getTime()); + + // Generate random distinct tokens + List<String> strings = generateRandomStrings(rows, 10); + + // Generate the dictionary by assigning unique ID to each distinct token + Map<String,Integer> map = writeDictToCsvFile(strings, baseDirectory + INPUT_DIR + "dict"); + + // Create the dataset by repeating and shuffling the distinct tokens + List<String> stringsColumn = shuffleAndMultiplyStrings(strings, 10); + writeStringsToCsvFile(stringsColumn, baseDirectory + INPUT_DIR + "data"); + + //run script + programArgs = new String[]{"-stats","-args", input("embeddings"), input("data"), input("dict"), output("result"), output("result2")}; + runTest(true, EXCEPTION_NOT_EXPECTED, null, -1); + + // Manually derive the expected result + double[][] res_expected = manuallyDeriveWordEmbeddings(cols, a, map, stringsColumn); + + // Compare results + HashMap<MatrixValue.CellIndex, Double> res_actual = readDMLMatrixFromOutputDir("result"); + HashMap<MatrixValue.CellIndex, Double> res_actual2 = readDMLMatrixFromOutputDir("result2"); + double[][] resultActualDouble = TestUtils.convertHashMapToDoubleArray(res_actual); + double[][] resultActualDouble2 = TestUtils.convertHashMapToDoubleArray(res_actual2); + //System.out.println("Actual Result1 [" + resultActualDouble.length + "x" + resultActualDouble[0].length + "]:"); + ///print2DimDoubleArray(resultActualDouble); + //System.out.println("\nActual Result2 [" + resultActualDouble.length + "x" + resultActualDouble[0].length + "]:"); + //print2DimDoubleArray(resultActualDouble2); + //System.out.println("\nExpected Result [" + res_expected.length + "x" + res_expected[0].length + "]:"); + //print2DimDoubleArray(res_expected); + TestUtils.compareMatrices(resultActualDouble, res_expected, 1e-6); + TestUtils.compareMatrices(resultActualDouble, resultActualDouble2, 1e-6); + } + catch(Exception ex) { + throw new RuntimeException(ex); + + } + finally { + resetExecMode(rtold); + } + } + + private double[][] manuallyDeriveWordEmbeddings(int cols, double[][] a, Map<String, Integer> map, List<String> stringsColumn) { + // Manually derive the expected result + double[][] res_expected = new double[stringsColumn.size()][cols]; + for (int i = 0; i < stringsColumn.size(); i++) { + int rowMapped = map.get(stringsColumn.get(i)); + System.arraycopy(a[rowMapped], 0, res_expected[i], 0, cols); + } + return res_expected; + } + + @SuppressWarnings("unused") + private double[][] generateWordEmbeddings(int rows, int cols) { + double[][] a = new double[rows][cols]; + for (int i = 0; i < a.length; i++) { + for (int j = 0; j < a[i].length; j++) { + a[i][j] = cols *i + j; + } + + } + return a; + } + + public static List<String> shuffleAndMultiplyStrings(List<String> strings, int multiply){ + List<String> out = new ArrayList<>(); + Random random = new Random(); + for (int i = 0; i < strings.size()*multiply; i++) { + out.add(strings.get(random.nextInt(strings.size()))); + } + return out; + } + + public static List<String> generateRandomStrings(int numStrings, int stringLength) { + List<String> randomStrings = new ArrayList<>(); + Random random = new Random(); + String characters = "ABCDEFGHIJKLMNOPQRSTUVWXYZabcdefghijklmnopqrstuvwxyz0123456789"; + for (int i = 0; i < numStrings; i++) { + randomStrings.add(generateRandomString(random, stringLength, characters)); + } + return randomStrings; + } + + public static String generateRandomString(Random random, int stringLength, String characters){ + StringBuilder randomString = new StringBuilder(); + for (int j = 0; j < stringLength; j++) { + int randomIndex = random.nextInt(characters.length()); + randomString.append(characters.charAt(randomIndex)); + } + return randomString.toString(); + } + + public static void writeStringsToCsvFile(List<String> strings, String fileName) { + try (BufferedWriter bw = new BufferedWriter(new FileWriter(fileName))) { + for (String line : strings) { + bw.write(line); + bw.newLine(); + } + } catch (IOException e) { + e.printStackTrace(); + } + } + + public static Map<String,Integer> writeDictToCsvFile(List<String> strings, String fileName) { + try (BufferedWriter bw = new BufferedWriter(new FileWriter(fileName))) { + Map<String,Integer> map = new HashMap<>(); + for (int i = 0; i < strings.size(); i++) { + map.put(strings.get(i), i); + bw.write(strings.get(i) + Lop.DATATYPE_PREFIX + (i+1) + "\n"); + } + return map; + } catch (IOException e) { + e.printStackTrace(); + return null; + } + } }
