http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/eb988781/src/main/java/org/apache/sysml/runtime/matrix/data/FrameBlock.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/runtime/matrix/data/FrameBlock.java b/src/main/java/org/apache/sysml/runtime/matrix/data/FrameBlock.java index 005b254..67674fe 100644 --- a/src/main/java/org/apache/sysml/runtime/matrix/data/FrameBlock.java +++ b/src/main/java/org/apache/sysml/runtime/matrix/data/FrameBlock.java @@ -29,12 +29,11 @@ import java.io.Serializable; import java.lang.ref.SoftReference; import java.util.ArrayList; import java.util.Arrays; -import java.util.Collections; import java.util.HashMap; import java.util.Iterator; -import java.util.List; import java.util.Map; +import org.apache.commons.lang3.ArrayUtils; import org.apache.hadoop.io.Writable; import org.apache.sysml.lops.Lop; import org.apache.sysml.parser.Expression.ValueType; @@ -61,29 +60,32 @@ public class FrameBlock implements Writable, CacheBlock, Externalizable private int _numRows = -1; /** The schema of the data frame as an ordered list of value types */ - private List<ValueType> _schema = null; + private ValueType[] _schema = null; /** The column names of the data frame as an ordered list of strings */ - private List<String> _colnames = null; + private String[] _colnames = null; - private List<ColumnMetadata> _colmeta = null; + private ColumnMetadata[] _colmeta = null; /** The data frame data as an ordered list of columns */ - private List<Array> _coldata = null; + private Array[] _coldata = null; /** Cache for recode maps from frame meta data, indexed by column 0-based */ private Map<Integer, SoftReference<HashMap<String,Long>>> _rcdMapCache = null; public FrameBlock() { _numRows = 0; - _schema = new ArrayList<ValueType>(); - _colnames = new ArrayList<String>(); - _colmeta = new ArrayList<ColumnMetadata>(); - _coldata = new ArrayList<Array>(); if( REUSE_RECODE_MAPS ) _rcdMapCache = new HashMap<Integer, SoftReference<HashMap<String,Long>>>(); } + /** + * Copy constructor for frame blocks, which uses a shallow copy for + * the schema (column types and names) but a deep copy for meta data + * and actual column data. + * + * @param that + */ public FrameBlock(FrameBlock that) { this(that.getSchema(), that.getColumnNames()); copy(that); @@ -92,32 +94,32 @@ public class FrameBlock implements Writable, CacheBlock, Externalizable public FrameBlock(int ncols, ValueType vt) { this(); - _schema.addAll(Collections.nCopies(ncols, vt)); + _schema = UtilFunctions.nCopies(ncols, vt); _colnames = createColNames(ncols); + _colmeta = new ColumnMetadata[ncols]; for( int j=0; j<ncols; j++ ) - _colmeta.add(new ColumnMetadata(0)); + _colmeta[j] = new ColumnMetadata(0); } - public FrameBlock(List<ValueType> schema) { + public FrameBlock(ValueType[] schema) { this(schema, new String[0][]); } - public FrameBlock(List<ValueType> schema, List<String> names) { + public FrameBlock(ValueType[] schema, String[] names) { this(schema, names, new String[0][]); } - public FrameBlock(List<ValueType> schema, String[][] data) { - this(schema, createColNames(schema.size()), data); + public FrameBlock(ValueType[] schema, String[][] data) { + this(schema, createColNames(schema.length), data); } - public FrameBlock(List<ValueType> schema, List<String> names, String[][] data) { + public FrameBlock(ValueType[] schema, String[] names, String[][] data) { _numRows = 0; //maintained on append - _schema = new ArrayList<ValueType>(schema); - _colnames = new ArrayList<String>(names); - _colmeta = new ArrayList<ColumnMetadata>(); - for( int j=0; j<_schema.size(); j++ ) - _colmeta.add(new ColumnMetadata(0)); - _coldata = new ArrayList<Array>(); + _schema = schema; + _colnames = names; + _colmeta = new ColumnMetadata[_schema.length]; + for( int j=0; j<_schema.length; j++ ) + _colmeta[j] = new ColumnMetadata(0); for( int i=0; i<data.length; i++ ) appendRow(data[i]); if( REUSE_RECODE_MAPS ) @@ -148,7 +150,7 @@ public class FrameBlock implements Writable, CacheBlock, Externalizable * @return */ public int getNumColumns() { - return _schema.size(); + return _schema.length; } /** @@ -156,7 +158,7 @@ public class FrameBlock implements Writable, CacheBlock, Externalizable * * @return */ - public List<ValueType> getSchema() { + public ValueType[] getSchema() { return _schema; } @@ -165,9 +167,9 @@ public class FrameBlock implements Writable, CacheBlock, Externalizable * * @return */ - public void setSchema(List<ValueType> schema) { + public void setSchema(ValueType[] schema) { _schema = schema; - _colnames = createColNames(schema.size()); + _colnames = createColNames(schema.length); } /** @@ -175,7 +177,7 @@ public class FrameBlock implements Writable, CacheBlock, Externalizable * * @return */ - public List<String> getColumnNames() { + public String[] getColumnNames() { return _colnames; } @@ -183,7 +185,7 @@ public class FrameBlock implements Writable, CacheBlock, Externalizable * * @param colnames */ - public void setColumnNames(List<String> colnames) { + public void setColumnNames(String[] colnames) { _colnames = colnames; } @@ -191,7 +193,7 @@ public class FrameBlock implements Writable, CacheBlock, Externalizable * * @return */ - public List<ColumnMetadata> getColumnMetadata() { + public ColumnMetadata[] getColumnMetadata() { return _colmeta; } @@ -201,7 +203,7 @@ public class FrameBlock implements Writable, CacheBlock, Externalizable * @return */ public ColumnMetadata getColumnMetadata(int c) { - return _colmeta.get(c); + return _colmeta[c]; } /** @@ -221,16 +223,16 @@ public class FrameBlock implements Writable, CacheBlock, Externalizable * @return */ public boolean isColumnMetadataDefault(int c) { - return _colmeta.get(c).getMvValue() == null - && _colmeta.get(c).getNumDistinct() == 0; + return _colmeta[c].getMvValue() == null + && _colmeta[c].getNumDistinct() == 0; } /** * * @param colmeta */ - public void setColumnMetadata(List<ColumnMetadata> colmeta) { - _colmeta = new ArrayList<FrameBlock.ColumnMetadata>(colmeta); + public void setColumnMetadata(ColumnMetadata[] colmeta) { + System.arraycopy(colmeta, 0, _colmeta, 0, _colmeta.length); } /** @@ -239,7 +241,7 @@ public class FrameBlock implements Writable, CacheBlock, Externalizable * @param colmeta */ public void setColumnMetadata(int c, ColumnMetadata colmeta) { - _colmeta.set(c, colmeta); + _colmeta[c] = colmeta; } /** @@ -251,7 +253,7 @@ public class FrameBlock implements Writable, CacheBlock, Externalizable public Map<String,Integer> getColumnNameIDMap() { Map<String, Integer> ret = new HashMap<String, Integer>(); for( int j=0; j<getNumColumns(); j++ ) - ret.put(_colnames.get(j), j+1); + ret.put(_colnames[j], j+1); return ret; } @@ -261,21 +263,24 @@ public class FrameBlock implements Writable, CacheBlock, Externalizable */ public void ensureAllocatedColumns(int numRows) { //early abort if already allocated - if( _schema.size() == _coldata.size() ) + if( _coldata != null && _schema.length == _coldata.length ) return; - //allocate column meta data - for( int j=0; j<_schema.size(); j++ ) - _colmeta.add(new ColumnMetadata(0)); + //allocate column meta data if necessary + if( _colmeta == null || _schema.length != _colmeta.length ) { + _colmeta = new ColumnMetadata[_schema.length]; + for( int j=0; j<_schema.length; j++ ) + _colmeta[j] = new ColumnMetadata(0); + } //allocate columns if necessary - for( int j=0; j<_schema.size(); j++ ) { - if( j >= _coldata.size() ) - switch( _schema.get(j) ) { - case STRING: _coldata.add(new StringArray(new String[numRows])); break; - case BOOLEAN: _coldata.add(new BooleanArray(new boolean[numRows])); break; - case INT: _coldata.add(new LongArray(new long[numRows])); break; - case DOUBLE: _coldata.add(new DoubleArray(new double[numRows])); break; - default: throw new RuntimeException("Unsupported value type: "+_schema.get(j)); - } + _coldata = new Array[_schema.length]; + for( int j=0; j<_schema.length; j++ ) { + switch( _schema[j] ) { + case STRING: _coldata[j] = new StringArray(new String[numRows]); break; + case BOOLEAN: _coldata[j] = new BooleanArray(new boolean[numRows]); break; + case INT: _coldata[j] = new LongArray(new long[numRows]); break; + case DOUBLE: _coldata[j] = new DoubleArray(new double[numRows]); break; + default: throw new RuntimeException("Unsupported value type: "+_schema[j]); + } } _numRows = numRows; } @@ -286,7 +291,7 @@ public class FrameBlock implements Writable, CacheBlock, Externalizable * @param newlen */ public void ensureColumnCompatibility(int newlen) { - if( _coldata.size() > 0 && _numRows != newlen ) + if( _coldata!=null && _coldata.length > 0 && _numRows != newlen ) throw new RuntimeException("Mismatch in number of rows: "+newlen+" (expected: "+_numRows+")"); } @@ -295,10 +300,10 @@ public class FrameBlock implements Writable, CacheBlock, Externalizable * @param size * @return */ - public static List<String> createColNames(int size) { - ArrayList<String> ret = new ArrayList<String>(size); + public static String[] createColNames(int size) { + String[] ret = new String[size]; for( int i=1; i<=size; i++ ) - ret.add(createColName(i)); + ret[i-1] = createColName(i); return ret; } @@ -328,7 +333,7 @@ public class FrameBlock implements Writable, CacheBlock, Externalizable * @return */ public boolean isColNameDefault(int i) { - return _colnames.get(i).equals("C"+i); + return _colnames[i].equals("C"+(i+1)); } /** @@ -339,7 +344,7 @@ public class FrameBlock implements Writable, CacheBlock, Externalizable int card = 0; for( int i=0; i<getNumRows(); i++ ) card += (get(i, j) != null) ? 1 : 0; - _colmeta.get(j).setNumDistinct(card); + _colmeta[j].setNumDistinct(card); } } @@ -354,7 +359,7 @@ public class FrameBlock implements Writable, CacheBlock, Externalizable * @return */ public Object get(int r, int c) { - return _coldata.get(c).get(r); + return _coldata[c].get(r); } /** @@ -366,7 +371,7 @@ public class FrameBlock implements Writable, CacheBlock, Externalizable * @param val */ public void set(int r, int c, Object val) { - _coldata.get(c).set(r, UtilFunctions.objectToObject(_schema.get(c), val)); + _coldata[c].set(r, UtilFunctions.objectToObject(_schema[c], val)); } /** @@ -376,17 +381,17 @@ public class FrameBlock implements Writable, CacheBlock, Externalizable */ public void reset(int nrow, boolean clearMeta) { if( clearMeta ) { - getSchema().clear(); - getColumnNames().clear(); + _schema = null; + _colnames = null; if( _colmeta != null ) { - for( int i=0; i<_colmeta.size(); i++ ) + for( int i=0; i<_colmeta.length; i++ ) if( !isColumnMetadataDefault(i) ) - _colmeta.set(i, new ColumnMetadata(0)); + _colmeta[i] = new ColumnMetadata(0); } } if(_coldata != null) { - for( int i=0; i < _coldata.size(); i++ ) - _coldata.get(i)._size = nrow; + for( int i=0; i < _coldata.length; i++ ) + _coldata[i]._size = nrow; } } @@ -407,7 +412,7 @@ public class FrameBlock implements Writable, CacheBlock, Externalizable public void appendRow(Object[] row) { ensureAllocatedColumns(0); for( int j=0; j<row.length; j++ ) - _coldata.get(j).append(row[j]); + _coldata[j].append(row[j]); _numRows++; } @@ -420,7 +425,7 @@ public class FrameBlock implements Writable, CacheBlock, Externalizable public void appendRow(String[] row) { ensureAllocatedColumns(0); for( int j=0; j<row.length; j++ ) - _coldata.get(j).append(row[j]); + _coldata[j].append(row[j]); _numRows++; } @@ -433,9 +438,10 @@ public class FrameBlock implements Writable, CacheBlock, Externalizable */ public void appendColumn(String[] col) { ensureColumnCompatibility(col.length); - _schema.add(ValueType.STRING); - _colnames.add(createColName(_schema.size())); - _coldata.add(new StringArray(col)); + _schema = ArrayUtils.add(_schema, ValueType.STRING); + _colnames = ArrayUtils.add(_colnames, createColName(_schema.length)); + _coldata = (_coldata==null) ? new Array[]{new StringArray(col)} : + ArrayUtils.add(_coldata, new StringArray(col)); _numRows = col.length; } @@ -448,9 +454,10 @@ public class FrameBlock implements Writable, CacheBlock, Externalizable */ public void appendColumn(boolean[] col) { ensureColumnCompatibility(col.length); - _schema.add(ValueType.BOOLEAN); - _colnames.add(createColName(_schema.size())); - _coldata.add(new BooleanArray(col)); + _schema = ArrayUtils.add(_schema, ValueType.BOOLEAN); + _colnames = ArrayUtils.add(_colnames, createColName(_schema.length)); + _coldata = (_coldata==null) ? new Array[]{new BooleanArray(col)} : + ArrayUtils.add(_coldata, new BooleanArray(col)); _numRows = col.length; } @@ -463,9 +470,10 @@ public class FrameBlock implements Writable, CacheBlock, Externalizable */ public void appendColumn(long[] col) { ensureColumnCompatibility(col.length); - _schema.add(ValueType.INT); - _colnames.add(createColName(_schema.size())); - _coldata.add(new LongArray(col)); + _schema = ArrayUtils.add(_schema, ValueType.INT); + _colnames = ArrayUtils.add(_colnames, createColName(_schema.length)); + _coldata = (_coldata==null) ? new Array[]{new LongArray(col)} : + ArrayUtils.add(_coldata, new LongArray(col)); _numRows = col.length; } @@ -478,9 +486,10 @@ public class FrameBlock implements Writable, CacheBlock, Externalizable */ public void appendColumn(double[] col) { ensureColumnCompatibility(col.length); - _schema.add(ValueType.DOUBLE); - _colnames.add(createColName(_schema.size())); - _coldata.add(new DoubleArray(col)); + _schema = ArrayUtils.add(_schema, ValueType.DOUBLE); + _colnames = ArrayUtils.add(_colnames, createColName(_schema.length)); + _coldata = (_coldata==null) ? new Array[]{new DoubleArray(col)} : + ArrayUtils.add(_coldata, new DoubleArray(col)); _numRows = col.length; } @@ -490,11 +499,11 @@ public class FrameBlock implements Writable, CacheBlock, Externalizable * @return */ public Object getColumn(int c) { - switch(_schema.get(c)) { - case STRING: return ((StringArray)_coldata.get(c))._data; - case BOOLEAN: return ((BooleanArray)_coldata.get(c))._data; - case INT: return ((LongArray)_coldata.get(c))._data; - case DOUBLE: return ((DoubleArray)_coldata.get(c))._data; + switch(_schema[c]) { + case STRING: return ((StringArray)_coldata[c])._data; + case BOOLEAN: return ((BooleanArray)_coldata[c])._data; + case INT: return ((LongArray)_coldata[c])._data; + case DOUBLE: return ((DoubleArray)_coldata[c])._data; default: return null; } } @@ -556,14 +565,14 @@ public class FrameBlock implements Writable, CacheBlock, Externalizable out.writeBoolean(isDefaultMeta); //write columns (value type, data) for( int j=0; j<getNumColumns(); j++ ) { - out.writeByte(_schema.get(j).ordinal()); + out.writeByte(_schema[j].ordinal()); if( !isDefaultMeta ) { - out.writeUTF(_colnames.get(j)); - out.writeLong(_colmeta.get(j).getNumDistinct()); - out.writeUTF( (_colmeta.get(j).getMvValue()!=null) ? - _colmeta.get(j).getMvValue() : "" ); + out.writeUTF(_colnames[j]); + out.writeLong(_colmeta[j].getNumDistinct()); + out.writeUTF( (_colmeta[j].getMvValue()!=null) ? + _colmeta[j].getMvValue() : "" ); } - _coldata.get(j).write(out); + _coldata[j].write(out); } } @@ -573,10 +582,16 @@ public class FrameBlock implements Writable, CacheBlock, Externalizable _numRows = in.readInt(); int numCols = in.readInt(); boolean isDefaultMeta = in.readBoolean(); + //allocate schema/meta data arrays + _schema = (_schema!=null && _schema.length==numCols) ? + _schema : new ValueType[numCols]; + _colnames = (_colnames != null && _colnames.length==numCols) ? + _colnames : new String[numCols]; + _colmeta = (_colmeta != null && _colmeta.length==numCols) ? + _colmeta : new ColumnMetadata[numCols]; + _coldata = (_coldata!=null && _coldata.length==numCols) ? + _coldata : new Array[numCols]; //read columns (value type, meta, data) - _schema.clear(); - _colmeta.clear(); - _coldata.clear(); for( int j=0; j<numCols; j++ ) { ValueType vt = ValueType.values()[in.readByte()]; String name = isDefaultMeta ? createColName(j) : in.readUTF(); @@ -591,11 +606,11 @@ public class FrameBlock implements Writable, CacheBlock, Externalizable default: throw new IOException("Unsupported value type: "+vt); } arr.readFields(in); - _schema.add(vt); - _colnames.add(name); - _colmeta.add(new ColumnMetadata(ndistinct, - (mvvalue==null || mvvalue.isEmpty()) ? null : mvvalue)); - _coldata.add(arr); + _schema[j] = vt; + _colnames[j] = name; + _colmeta[j] = new ColumnMetadata(ndistinct, + (mvvalue==null || mvvalue.isEmpty()) ? null : mvvalue); + _coldata[j] = arr; } } @@ -678,16 +693,17 @@ public class FrameBlock implements Writable, CacheBlock, Externalizable if( ret == null ) ret = new FrameBlock(); ret._numRows = _numRows; - ret._schema = new ArrayList<ValueType>(_schema); - ret._colnames = new ArrayList<String>(_colnames); - ret._colmeta = new ArrayList<ColumnMetadata>(_colmeta); + ret._schema = _schema.clone(); + ret._colnames = _colnames.clone(); + ret._colmeta = _colmeta.clone(); + ret._coldata = new Array[getNumColumns()]; //copy data to output and partial overwrite w/ rhs for( int j=0; j<getNumColumns(); j++ ) { - Array tmp = _coldata.get(j).clone(); + Array tmp = _coldata[j].clone(); if( j>=cl && j<=cu ) - tmp.set(rl, ru, rhsFrame._coldata.get(j-cl)); - ret._coldata.add(tmp); + tmp.set(rl, ru, rhsFrame._coldata[j-cl]); + ret._coldata[j] = tmp; } return ret; @@ -737,20 +753,27 @@ public class FrameBlock implements Writable, CacheBlock, Externalizable ret.reset(ru-rl+1, true); //copy output schema and colnames + int numCols = cu-cl+1; + ret._schema = new ValueType[numCols]; + ret._colnames = new String[numCols]; + ret._colmeta = new ColumnMetadata[numCols]; + for( int j=cl; j<=cu; j++ ) { - ret._schema.add(_schema.get(j)); - ret._colnames.add(_colnames.get(j)); - ret._colmeta.add(_colmeta.get(j)); + ret._schema[j-cl] = _schema[j]; + ret._colnames[j-cl] = _colnames[j]; + ret._colmeta[j-cl] = _colmeta[j]; } ret._numRows = ru-rl+1; //copy output data - if(ret._coldata.size() == 0) + if(ret._coldata == null ) { + ret._coldata = new Array[numCols]; for( int j=cl; j<=cu; j++ ) - ret._coldata.add(_coldata.get(j).slice(rl,ru)); + ret._coldata[j-cl] = _coldata[j].slice(rl,ru); + } else for( int j=cl; j<=cu; j++ ) - ret._coldata.get(j-cl).set(0, ru-rl, _coldata.get(j), rl); + ret._coldata[j-cl].set(0, ru-rl, _coldata[j], rl); return ret; } @@ -818,18 +841,14 @@ public class FrameBlock implements Writable, CacheBlock, Externalizable ret._numRows = _numRows; //concatenate schemas (w/ deep copy to prevent side effects) - ret._schema = new ArrayList<ValueType>(_schema); - ret._schema.addAll(that._schema); - ret._colnames = new ArrayList<String>(_colnames); - ret._colnames.addAll(that._colnames); - ret._colmeta = new ArrayList<ColumnMetadata>(_colmeta); - ret._colmeta.addAll(that._colmeta); + ret._schema = ArrayUtils.addAll(_schema, that._schema); + ret._colnames = ArrayUtils.addAll(_colnames, that._colnames); + ret._colmeta = ArrayUtils.addAll(_colmeta, that._colmeta); //concatenate column data (w/ deep copy to prevent side effects) - for( Array tmp : _coldata ) - ret._coldata.add(tmp.clone()); - for( Array tmp : that._coldata ) - ret._coldata.add(tmp.clone()); + ret._coldata = ArrayUtils.addAll(_coldata, that._coldata); + for( int i=0; i<ret._coldata.length; i++ ) + ret._coldata[i] = ret._coldata[i].clone(); } else //ROW APPEND { @@ -843,12 +862,13 @@ public class FrameBlock implements Writable, CacheBlock, Externalizable if( ret == null ) ret = new FrameBlock(); ret._numRows = _numRows; - ret._schema = new ArrayList<ValueType>(_schema); - ret._colnames = new ArrayList<String>(_colnames); + ret._schema = _schema.clone(); + ret._colnames = _colnames.clone(); //concatenate data (deep copy first, append second) - for( Array tmp : _coldata ) - ret._coldata.add(tmp.clone()); + ret._coldata = new Array[_coldata.length]; + for( int j=0; j<_coldata.length; j++ ) + ret._coldata[j] = _coldata[j].clone(); Iterator<Object[]> iter = that.getObjectRowIterator(); while( iter.hasNext() ) ret.appendRow(iter.next()); @@ -882,13 +902,13 @@ public class FrameBlock implements Writable, CacheBlock, Externalizable //copy values for( int j=cl; j<=cu; j++ ) { //special case: column memcopy - if( _schema.get(j).equals(src._schema.get(j-cl)) ) - _coldata.get(j).set(rl, ru, src._coldata.get(j-cl)); + if( _schema[j].equals(src._schema[j-cl]) ) + _coldata[j].set(rl, ru, src._coldata[j-cl]); //general case w/ schema transformation else for( int i=rl; i<=ru; i++ ) { String tmp = src.get(i-rl, j-cl)!=null ? src.get(i-rl, j-cl).toString() : null; - set(i, j, UtilFunctions.stringToObject(_schema.get(j), tmp)); + set(i, j, UtilFunctions.stringToObject(_schema[j], tmp)); } } } @@ -912,7 +932,7 @@ public class FrameBlock implements Writable, CacheBlock, Externalizable //construct recode map HashMap<String,Long> map = new HashMap<String,Long>(); - Array ldata = _coldata.get(col); + Array ldata = _coldata[col]; for( int i=0; i<getNumRows(); i++ ) { Object val = ldata.get(i); if( val != null ) { @@ -955,21 +975,21 @@ public class FrameBlock implements Writable, CacheBlock, Externalizable //meta data copy if necessary for( int j=0; j<getNumColumns(); j++ ) if( !that.isColumnMetadataDefault(j) ) { - _colmeta.get(j).setNumDistinct(that._colmeta.get(j).getNumDistinct()); - _colmeta.get(j).setMvValue(that._colmeta.get(j).getMvValue()); + _colmeta[j].setNumDistinct(that._colmeta[j].getNumDistinct()); + _colmeta[j].setMvValue(that._colmeta[j].getMvValue()); } //core frame block merge through cell copy //with column-wide access pattern for( int j=0; j<getNumColumns(); j++ ) { //special case: copy non-zeros of column - if( _schema.get(j).equals(that._schema.get(j)) ) - _coldata.get(j).setNz(0, _numRows-1, that._coldata.get(j)); + if( _schema[j].equals(that._schema[j]) ) + _coldata[j].setNz(0, _numRows-1, that._coldata[j]); //general case w/ schema transformation else { for( int i=0; i<_numRows; i++ ) { Object obj = UtilFunctions.objectToObject( - getSchema().get(j), that.get(i,j), true); + _schema[j], that.get(i,j), true); if (obj != null) //merge non-zeros set(i, j,obj); }
http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/eb988781/src/main/java/org/apache/sysml/runtime/matrix/data/OperationsOnMatrixValues.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/runtime/matrix/data/OperationsOnMatrixValues.java b/src/main/java/org/apache/sysml/runtime/matrix/data/OperationsOnMatrixValues.java index f2c7ecb..bd457ca 100644 --- a/src/main/java/org/apache/sysml/runtime/matrix/data/OperationsOnMatrixValues.java +++ b/src/main/java/org/apache/sysml/runtime/matrix/data/OperationsOnMatrixValues.java @@ -21,8 +21,7 @@ package org.apache.sysml.runtime.matrix.data; import java.util.ArrayList; -import java.util.Collections; -import java.util.List; +import java.util.Arrays; import org.apache.sysml.parser.Expression.ValueType; import org.apache.sysml.runtime.DMLRuntimeException; @@ -569,7 +568,7 @@ public class OperationsOnMatrixValues //allocate space for the output value for(long r=resultBlockIndexTop; r<=resultBlockIndexBottom; r++) { - List<ValueType> schema = UtilFunctions.getSubSchema(block.getSchema(), tmpRange.colStart, tmpRange.colEnd); + ValueType[] schema = Arrays.copyOfRange(block.getSchema(), (int)tmpRange.colStart, (int)tmpRange.colEnd+1); long iResultIndex = Math.max(((r-1)*brlen - ixrange.rowStart + 1), 0); Pair<Long,FrameBlock> out=new Pair<Long,FrameBlock>(new Long(iResultIndex+1), new FrameBlock(schema)); outlist.add(out); @@ -631,12 +630,11 @@ public class OperationsOnMatrixValues int lbclen = clenLeft; - List<ValueType> schemaPartialLeft = Collections.nCopies(lhs_lcl, ValueType.STRING); - List<ValueType> schemaRHS = UtilFunctions.getSubSchema(fb.getSchema(), rhs_lcl, rhs_lcl-lhs_lcl+lhs_lcu); - List<ValueType> schema = new ArrayList<ValueType>(schemaPartialLeft); - schema.addAll(schemaRHS); - List<ValueType> schemaPartialRight = Collections.nCopies(lbclen-schema.size(), ValueType.STRING); - schema.addAll(schemaPartialRight); + ValueType[] schemaPartialLeft = UtilFunctions.nCopies(lhs_lcl, ValueType.STRING); + ValueType[] schemaRHS = Arrays.copyOfRange(fb.getSchema(), (int)(rhs_lcl), (int)(rhs_lcl-lhs_lcl+lhs_lcu+1)); + ValueType[] schema = UtilFunctions.copyOf(schemaPartialLeft, schemaRHS); + ValueType[] schemaPartialRight = UtilFunctions.nCopies(lbclen-schema.length, ValueType.STRING); + schema = UtilFunctions.copyOf(schema, schemaPartialRight); FrameBlock resultBlock = new FrameBlock(schema); int iRHSRows = (int)(leftRowIndex<=rlen/brlenLeft?brlenLeft:rlen-(rlen/brlenLeft)*brlenLeft); resultBlock.ensureAllocatedColumns(iRHSRows); http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/eb988781/src/main/java/org/apache/sysml/runtime/matrix/mapred/FrameReblockBuffer.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/runtime/matrix/mapred/FrameReblockBuffer.java b/src/main/java/org/apache/sysml/runtime/matrix/mapred/FrameReblockBuffer.java index e844be1..41615c5 100644 --- a/src/main/java/org/apache/sysml/runtime/matrix/mapred/FrameReblockBuffer.java +++ b/src/main/java/org/apache/sysml/runtime/matrix/mapred/FrameReblockBuffer.java @@ -24,7 +24,6 @@ import java.io.IOException; import java.util.ArrayList; import java.util.Arrays; import java.util.Comparator; -import java.util.List; import org.apache.hadoop.io.Writable; import org.apache.hadoop.mapred.OutputCollector; @@ -54,7 +53,7 @@ public class FrameReblockBuffer private int _brlen = -1; private int _bclen = -1; - private List<ValueType> _schema; + private ValueType[] _schema; /** @@ -64,7 +63,7 @@ public class FrameReblockBuffer * @return * */ - public FrameReblockBuffer( long rlen, long clen, List<ValueType> schema ) + public FrameReblockBuffer( long rlen, long clen, ValueType[] schema ) { this( DEFAULT_BUFFER_SIZE, rlen, clen, schema ); } @@ -77,7 +76,7 @@ public class FrameReblockBuffer * @return * */ - public FrameReblockBuffer( int buffersize, long rlen, long clen, List<ValueType> schema ) + public FrameReblockBuffer( int buffersize, long rlen, long clen, ValueType[] schema ) { _bufflen = buffersize; _count = 0; http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/eb988781/src/main/java/org/apache/sysml/runtime/transform/BinAgent.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/runtime/transform/BinAgent.java b/src/main/java/org/apache/sysml/runtime/transform/BinAgent.java index ee0e56a..46090c0 100644 --- a/src/main/java/org/apache/sysml/runtime/transform/BinAgent.java +++ b/src/main/java/org/apache/sysml/runtime/transform/BinAgent.java @@ -68,7 +68,7 @@ public class BinAgent extends Encoder super( null, clen ); } - public BinAgent(JSONObject parsedSpec, List<String> colnames, int clen) + public BinAgent(JSONObject parsedSpec, String[] colnames, int clen) throws JSONException, IOException { this(parsedSpec, colnames, clen, false); @@ -81,7 +81,7 @@ public class BinAgent extends Encoder * @throws JSONException * @throws IOException */ - public BinAgent(JSONObject parsedSpec, List<String> colnames, int clen, boolean colsOnly) + public BinAgent(JSONObject parsedSpec, String[] colnames, int clen, boolean colsOnly) throws JSONException, IOException { super( null, clen ); @@ -364,7 +364,7 @@ public class BinAgent extends Encoder int colID = _colList[j]; for( int i=0; i<in.getNumRows(); i++ ) { double inVal = UtilFunctions.objectToDouble( - in.getSchema().get(colID-1), in.get(i, colID-1)); + in.getSchema()[colID-1], in.get(i, colID-1)); int ix = Arrays.binarySearch(_binMaxs[j], inVal); int binID = ((ix < 0) ? Math.abs(ix+1) : ix) + 1; out.quickSetValue(i, colID-1, binID); @@ -384,7 +384,7 @@ public class BinAgent extends Encoder _binMaxs = new double[_colList.length][]; for( int j=0; j<_colList.length; j++ ) { int colID = _colList[j]; //1-based - int nbins = (int)meta.getColumnMetadata().get(colID-1).getNumDistinct(); + int nbins = (int)meta.getColumnMetadata()[colID-1].getNumDistinct(); _binMins[j] = new double[nbins]; _binMaxs[j] = new double[nbins]; for( int i=0; i<nbins; i++ ) { http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/eb988781/src/main/java/org/apache/sysml/runtime/transform/DummycodeAgent.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/runtime/transform/DummycodeAgent.java b/src/main/java/org/apache/sysml/runtime/transform/DummycodeAgent.java index b51d639..e0b4826 100644 --- a/src/main/java/org/apache/sysml/runtime/transform/DummycodeAgent.java +++ b/src/main/java/org/apache/sysml/runtime/transform/DummycodeAgent.java @@ -64,7 +64,7 @@ public class DummycodeAgent extends Encoder super(list, clen); } - public DummycodeAgent(JSONObject parsedSpec, List<String> colnames, int clen) throws JSONException { + public DummycodeAgent(JSONObject parsedSpec, String[] colnames, int clen) throws JSONException { super(null, clen); if ( parsedSpec.containsKey(TfUtils.TXMETHOD_DUMMYCODE) ) { @@ -443,7 +443,7 @@ public class DummycodeAgent extends Encoder idx++; } else { - double ptval = UtilFunctions.objectToDouble(in.getSchema().get(colID-1), in.get(i, colID-1)); + double ptval = UtilFunctions.objectToDouble(in.getSchema()[colID-1], in.get(i, colID-1)); ret.quickSetValue(i, ncolID-1, ptval); ncolID++; } @@ -465,7 +465,7 @@ public class DummycodeAgent extends Encoder _dummycodedLength = _clen; for( int j=0; j<_colList.length; j++ ) { int colID = _colList[j]; //1-based - _domainSizes[j] = (int)meta.getColumnMetadata().get(colID-1).getNumDistinct(); + _domainSizes[j] = (int)meta.getColumnMetadata()[colID-1].getNumDistinct(); _dummycodedLength += _domainSizes[j]-1; } } http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/eb988781/src/main/java/org/apache/sysml/runtime/transform/MVImputeAgent.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/runtime/transform/MVImputeAgent.java b/src/main/java/org/apache/sysml/runtime/transform/MVImputeAgent.java index 1266ced..4ff93a5 100644 --- a/src/main/java/org/apache/sysml/runtime/transform/MVImputeAgent.java +++ b/src/main/java/org/apache/sysml/runtime/transform/MVImputeAgent.java @@ -98,7 +98,7 @@ public class MVImputeAgent extends Encoder public KahanObject[] getMeans_scnomv() { return _scnomvMeanList; } public CM_COV_Object[] getVars_scnomv() { return _scnomvVarList; } - public MVImputeAgent(JSONObject parsedSpec, List<String> colnames, int clen) + public MVImputeAgent(JSONObject parsedSpec, String[] colnames, int clen) throws JSONException { super(null, clen); @@ -114,7 +114,7 @@ public class MVImputeAgent extends Encoder _hist = new HashMap<Integer, HashMap<String,Long>>(); } - public MVImputeAgent(JSONObject parsedSpec, String[] NAstrings, int clen) + public MVImputeAgent(JSONObject parsedSpec, String[] colnames, String[] NAstrings, int clen) throws JSONException { super(null, clen); @@ -947,7 +947,7 @@ public class MVImputeAgent extends Encoder long off = _countList[j]; for( int i=0; i<in.getNumRows(); i++ ) _meanFn.execute2(_meanList[j], UtilFunctions.objectToDouble( - in.getSchema().get(colID-1), in.get(i, colID-1)), off+i+1); + in.getSchema()[colID-1], in.get(i, colID-1)), off+i+1); _replacementList[j] = String.valueOf(_meanList[j]._sum); _countList[j] += in.getNumRows(); } http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/eb988781/src/main/java/org/apache/sysml/runtime/transform/OmitAgent.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/runtime/transform/OmitAgent.java b/src/main/java/org/apache/sysml/runtime/transform/OmitAgent.java index de6d59f..982f4b9 100644 --- a/src/main/java/org/apache/sysml/runtime/transform/OmitAgent.java +++ b/src/main/java/org/apache/sysml/runtime/transform/OmitAgent.java @@ -21,7 +21,6 @@ package org.apache.sysml.runtime.transform; import java.io.IOException; import java.util.Iterator; -import java.util.List; import org.apache.hadoop.fs.FileSystem; import org.apache.hadoop.fs.Path; @@ -50,7 +49,7 @@ public class OmitAgent extends Encoder super(list, clen); } - public OmitAgent(JSONObject parsedSpec, List<String> colnames, int clen) + public OmitAgent(JSONObject parsedSpec, String[] colnames, int clen) throws JSONException { super(null, clen); http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/eb988781/src/main/java/org/apache/sysml/runtime/transform/RecodeAgent.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/runtime/transform/RecodeAgent.java b/src/main/java/org/apache/sysml/runtime/transform/RecodeAgent.java index edfdff4..8ec2db3 100644 --- a/src/main/java/org/apache/sysml/runtime/transform/RecodeAgent.java +++ b/src/main/java/org/apache/sysml/runtime/transform/RecodeAgent.java @@ -59,7 +59,7 @@ public class RecodeAgent extends Encoder private HashMap<Integer, HashMap<String, Long>> _rcdMaps = new HashMap<Integer, HashMap<String, Long>>(); private HashMap<Integer, HashMap<String,String>> _finalMaps = null; - public RecodeAgent(JSONObject parsedSpec, List<String> colnames, int clen) + public RecodeAgent(JSONObject parsedSpec, String[] colnames, int clen) throws JSONException { super(null, clen); http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/eb988781/src/main/java/org/apache/sysml/runtime/transform/TfUtils.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/runtime/transform/TfUtils.java b/src/main/java/org/apache/sysml/runtime/transform/TfUtils.java index dd18b43..7a5da65 100644 --- a/src/main/java/org/apache/sysml/runtime/transform/TfUtils.java +++ b/src/main/java/org/apache/sysml/runtime/transform/TfUtils.java @@ -23,7 +23,6 @@ import java.io.EOFException; import java.io.IOException; import java.io.Serializable; import java.util.Arrays; -import java.util.List; import java.util.regex.Pattern; import org.apache.hadoop.filecache.DistributedCache; @@ -250,13 +249,11 @@ public class TfUtils implements Serializable{ private void createAgents(JSONObject spec, String[] naStrings) throws IOException, JSONException { - List<String> colnames = Arrays.asList(_outputColumnNames); - - _oa = new OmitAgent(spec, colnames, _numInputCols); - _mia = new MVImputeAgent(spec, naStrings, _numInputCols); - _ra = new RecodeAgent(spec, colnames, _numInputCols); - _ba = new BinAgent(spec, colnames, _numInputCols); - _da = new DummycodeAgent(spec, colnames, _numInputCols); + _oa = new OmitAgent(spec, _outputColumnNames, _numInputCols); + _mia = new MVImputeAgent(spec, null, naStrings, _numInputCols); + _ra = new RecodeAgent(spec, _outputColumnNames, _numInputCols); + _ba = new BinAgent(spec, _outputColumnNames, _numInputCols); + _da = new DummycodeAgent(spec, _outputColumnNames, _numInputCols); } public void setupAgents(OmitAgent oa, MVImputeAgent mia, RecodeAgent ra, BinAgent ba, DummycodeAgent da) { http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/eb988781/src/main/java/org/apache/sysml/runtime/transform/decode/DecoderDummycode.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/runtime/transform/decode/DecoderDummycode.java b/src/main/java/org/apache/sysml/runtime/transform/decode/DecoderDummycode.java index 2916742..3495cf3 100644 --- a/src/main/java/org/apache/sysml/runtime/transform/decode/DecoderDummycode.java +++ b/src/main/java/org/apache/sysml/runtime/transform/decode/DecoderDummycode.java @@ -52,7 +52,7 @@ public class DecoderDummycode extends Decoder if( in.quickGetValue(i, k-1) != 0 ) { int col = _colList[j] - 1; out.set(i, col, UtilFunctions.doubleToObject( - out.getSchema().get(col), k-_clPos[j]+1)); + out.getSchema()[col], k-_clPos[j]+1)); } return out; } @@ -63,8 +63,8 @@ public class DecoderDummycode extends Decoder _cuPos = new int[_colList.length]; //col upper pos for( int j=0, off=0; j<_colList.length; j++ ) { int colID = _colList[j]; - int ndist = (int)meta.getColumnMetadata() - .get(colID-1).getNumDistinct(); + int ndist = (int)meta.getColumnMetadata()[colID-1] + .getNumDistinct(); _clPos[j] = off + colID; _cuPos[j] = _clPos[j] + ndist; off += ndist - 1; http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/eb988781/src/main/java/org/apache/sysml/runtime/transform/decode/DecoderFactory.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/runtime/transform/decode/DecoderFactory.java b/src/main/java/org/apache/sysml/runtime/transform/decode/DecoderFactory.java index f276015..facfff8 100644 --- a/src/main/java/org/apache/sysml/runtime/transform/decode/DecoderFactory.java +++ b/src/main/java/org/apache/sysml/runtime/transform/decode/DecoderFactory.java @@ -46,7 +46,7 @@ public class DecoderFactory * @throws DMLRuntimeException */ @SuppressWarnings("unchecked") - public static Decoder createDecoder(String spec, List<String> colnames, List<ValueType> schema, FrameBlock meta) + public static Decoder createDecoder(String spec, String[] colnames, List<ValueType> schema, FrameBlock meta) throws DMLRuntimeException { Decoder decoder = null; http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/eb988781/src/main/java/org/apache/sysml/runtime/transform/decode/DecoderPassThrough.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/runtime/transform/decode/DecoderPassThrough.java b/src/main/java/org/apache/sysml/runtime/transform/decode/DecoderPassThrough.java index d2bf7fa..1ee0568 100644 --- a/src/main/java/org/apache/sysml/runtime/transform/decode/DecoderPassThrough.java +++ b/src/main/java/org/apache/sysml/runtime/transform/decode/DecoderPassThrough.java @@ -70,8 +70,8 @@ public class DecoderPassThrough extends Decoder ix1 ++; } else { //_colList[ix1] > _dcCols[ix2] - off += (int)meta.getColumnMetadata() - .get(_dcCols[ix2]-1).getNumDistinct() - 1; + off += (int)meta.getColumnMetadata()[_dcCols[ix2]-1] + .getNumDistinct() - 1; ix2 ++; } } http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/eb988781/src/main/java/org/apache/sysml/runtime/transform/decode/DecoderRecode.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/runtime/transform/decode/DecoderRecode.java b/src/main/java/org/apache/sysml/runtime/transform/decode/DecoderRecode.java index 5484ded..42a0da9 100644 --- a/src/main/java/org/apache/sysml/runtime/transform/decode/DecoderRecode.java +++ b/src/main/java/org/apache/sysml/runtime/transform/decode/DecoderRecode.java @@ -54,7 +54,7 @@ public class DecoderRecode extends Decoder for( int j=0; j<_colList.length; j++ ) { int colID = _colList[j]; double val = UtilFunctions.objectToDouble( - out.getSchema().get(colID-1), out.get(i, colID-1)); + out.getSchema()[colID-1], out.get(i, colID-1)); long key = UtilFunctions.toLong(val); out.set(i, colID-1, _rcMaps[j].get(key)); } http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/eb988781/src/main/java/org/apache/sysml/runtime/transform/encode/EncoderFactory.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/runtime/transform/encode/EncoderFactory.java b/src/main/java/org/apache/sysml/runtime/transform/encode/EncoderFactory.java index 8adea7b..b71f563 100644 --- a/src/main/java/org/apache/sysml/runtime/transform/encode/EncoderFactory.java +++ b/src/main/java/org/apache/sysml/runtime/transform/encode/EncoderFactory.java @@ -21,7 +21,6 @@ package org.apache.sysml.runtime.transform.encode; import java.util.ArrayList; import java.util.Arrays; -import java.util.Collections; import java.util.List; import org.apache.commons.collections.CollectionUtils; @@ -48,8 +47,8 @@ public class EncoderFactory * @return * @throws DMLRuntimeException */ - public static Encoder createEncoder(String spec, List<String> colnames, int clen, FrameBlock meta) throws DMLRuntimeException { - return createEncoder(spec, colnames, Collections.nCopies(clen, ValueType.STRING), meta); + public static Encoder createEncoder(String spec, String[] colnames, int clen, FrameBlock meta) throws DMLRuntimeException { + return createEncoder(spec, colnames, UtilFunctions.nCopies(clen, ValueType.STRING), meta); } /** @@ -61,8 +60,8 @@ public class EncoderFactory * @return * @throws DMLRuntimeException */ - public static Encoder createEncoder(String spec, List<String> colnames, List<ValueType> schema, int clen, FrameBlock meta) throws DMLRuntimeException { - List<ValueType> lschema = (schema==null) ? Collections.nCopies(clen, ValueType.STRING) : schema; + public static Encoder createEncoder(String spec, String[] colnames, ValueType[] schema, int clen, FrameBlock meta) throws DMLRuntimeException { + ValueType[] lschema = (schema==null) ? UtilFunctions.nCopies(clen, ValueType.STRING) : schema; return createEncoder(spec, colnames, lschema, meta); } @@ -75,11 +74,11 @@ public class EncoderFactory * @throws DMLRuntimeException */ @SuppressWarnings("unchecked") - public static Encoder createEncoder(String spec, List<String> colnames, List<ValueType> schema, FrameBlock meta) + public static Encoder createEncoder(String spec, String[] colnames, ValueType[] schema, FrameBlock meta) throws DMLRuntimeException { Encoder encoder = null; - int clen = schema.size(); + int clen = schema.length; try { //parse transform specification @@ -111,13 +110,13 @@ public class EncoderFactory lencoders.add(new EncoderPassThrough( ArrayUtils.toPrimitive(ptIDs.toArray(new Integer[0])), clen)); if( !dcIDs.isEmpty() ) - lencoders.add(new DummycodeAgent(jSpec, colnames, schema.size())); + lencoders.add(new DummycodeAgent(jSpec, colnames, schema.length)); if( !binIDs.isEmpty() ) - lencoders.add(new BinAgent(jSpec, colnames, schema.size(), true)); + lencoders.add(new BinAgent(jSpec, colnames, schema.length, true)); if( !oIDs.isEmpty() ) - lencoders.add(new OmitAgent(jSpec, colnames, schema.size())); + lencoders.add(new OmitAgent(jSpec, colnames, schema.length)); if( !mvIDs.isEmpty() ) { - MVImputeAgent ma = new MVImputeAgent(jSpec, colnames, schema.size()); + MVImputeAgent ma = new MVImputeAgent(jSpec, colnames, schema.length); ma.initRecodeIDList(rcIDs); lencoders.add(ma); } http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/eb988781/src/main/java/org/apache/sysml/runtime/transform/encode/EncoderPassThrough.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/runtime/transform/encode/EncoderPassThrough.java b/src/main/java/org/apache/sysml/runtime/transform/encode/EncoderPassThrough.java index ab146ce..08722fd 100644 --- a/src/main/java/org/apache/sysml/runtime/transform/encode/EncoderPassThrough.java +++ b/src/main/java/org/apache/sysml/runtime/transform/encode/EncoderPassThrough.java @@ -67,7 +67,7 @@ public class EncoderPassThrough extends Encoder public MatrixBlock apply(FrameBlock in, MatrixBlock out) { for( int j=0; j<_colList.length; j++ ) { int col = _colList[j]-1; - ValueType vt = in.getSchema().get(col); + ValueType vt = in.getSchema()[col]; for( int i=0; i<in.getNumRows(); i++ ) { Object val = in.get(i, col); out.quickSetValue(i, col, (val==null||(vt==ValueType.STRING http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/eb988781/src/main/java/org/apache/sysml/runtime/transform/meta/TfMetaUtils.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/runtime/transform/meta/TfMetaUtils.java b/src/main/java/org/apache/sysml/runtime/transform/meta/TfMetaUtils.java index de883f3..d12ff1d 100644 --- a/src/main/java/org/apache/sysml/runtime/transform/meta/TfMetaUtils.java +++ b/src/main/java/org/apache/sysml/runtime/transform/meta/TfMetaUtils.java @@ -27,11 +27,12 @@ import java.io.InputStream; import java.io.InputStreamReader; import java.util.ArrayList; import java.util.Arrays; -import java.util.Collections; import java.util.HashMap; import java.util.List; import java.util.Map.Entry; +import jodd.util.ArraysUtil; + import org.apache.commons.collections.CollectionUtils; import org.apache.commons.lang3.ArrayUtils; import org.apache.sysml.api.jmlc.Connection; @@ -74,7 +75,7 @@ public class TfMetaUtils * @return * @throws DMLRuntimeException */ - public static boolean containsOmitSpec(String spec, List<String> colnames) throws DMLRuntimeException { + public static boolean containsOmitSpec(String spec, String[] colnames) throws DMLRuntimeException { return (TfMetaUtils.parseJsonIDList(spec, colnames, TfUtils.TXMETHOD_OMIT).length > 0); } @@ -86,7 +87,7 @@ public class TfMetaUtils * @return * @throws DMLRuntimeException */ - public static int[] parseJsonIDList(String spec, List<String> colnames, String group) + public static int[] parseJsonIDList(String spec, String[] colnames, String group) throws DMLRuntimeException { try { @@ -107,7 +108,7 @@ public class TfMetaUtils * @return * @throws JSONException */ - public static int[] parseJsonIDList(JSONObject spec, List<String> colnames, String group) + public static int[] parseJsonIDList(JSONObject spec, String[] colnames, String group) throws JSONException { int[] colList = new int[0]; @@ -127,7 +128,7 @@ public class TfMetaUtils colList = new int[attrs.size()]; for(int i=0; i < colList.length; i++) { colList[i] = ids ? UtilFunctions.toInt(attrs.get(i)) : - (colnames.indexOf(attrs.get(i)) + 1); + (ArraysUtil.indexOf(colnames, attrs.get(i)) + 1); if( colList[i] <= 0 ) { throw new RuntimeException("Specified column '" + attrs.get(i)+"' does not exist."); @@ -148,7 +149,7 @@ public class TfMetaUtils * @return * @throws JSONException */ - public static int[] parseJsonObjectIDList(JSONObject spec, List<String> colnames, String group) + public static int[] parseJsonObjectIDList(JSONObject spec, String[] colnames, String group) throws JSONException { int[] colList = new int[0]; @@ -161,7 +162,7 @@ public class TfMetaUtils for(int j=0; j<colspecs.size(); j++) { JSONObject colspec = (JSONObject) colspecs.get(j); colList[j] = ids ? colspec.getInt("id") : - (colnames.indexOf(colspec.get("name")) + 1); + (ArrayUtils.indexOf(colnames, colspec.get("name")) + 1); if( colList[j] <= 0 ) { throw new RuntimeException("Specified column '" + colspec.get(ids?"id":"name")+"' does not exist."); @@ -190,15 +191,15 @@ public class TfMetaUtils { //read column names String colnamesStr = MapReduceTool.readStringFromHDFSFile(metapath+File.separator+TfUtils.TXMTD_COLNAMES); - List<String> colnames = Arrays.asList(IOUtilFunctions.split(colnamesStr.trim(), colDelim)); + String[] colnames = IOUtilFunctions.split(colnamesStr.trim(), colDelim); //read meta data (currently supported: recode, dummycode, bin, omit, impute) //note: recode/binning and impute might be applied on the same column HashMap<String,String> meta = new HashMap<String,String>(); HashMap<String,String> mvmeta = new HashMap<String,String>(); int rows = 0; - for( int j=0; j<colnames.size(); j++ ) { - String colName = colnames.get(j); + for( int j=0; j<colnames.length; j++ ) { + String colName = colnames[j]; //read recode maps for recoded or dummycoded columns String name = metapath+File.separator+"Recode"+File.separator+colName; if( MapReduceTool.existsFileOnHDFS(name+TfUtils.TXMTD_RCD_MAP_SUFFIX) ) { @@ -244,15 +245,15 @@ public class TfMetaUtils { //read column names String colnamesStr = IOUtilFunctions.toString(Connection.class.getResourceAsStream(metapath+"/"+TfUtils.TXMTD_COLNAMES)); - List<String> colnames = Arrays.asList(IOUtilFunctions.split(colnamesStr.trim(), colDelim)); + String[] colnames = IOUtilFunctions.split(colnamesStr.trim(), colDelim); //read meta data (currently supported: recode, dummycode, bin, omit) //note: recode/binning and impute might be applied on the same column HashMap<String,String> meta = new HashMap<String,String>(); HashMap<String,String> mvmeta = new HashMap<String,String>(); int rows = 0; - for( int j=0; j<colnames.size(); j++ ) { - String colName = colnames.get(j); + for( int j=0; j<colnames.length; j++ ) { + String colName = colnames[j]; //read recode maps for recoded or dummycoded columns String name = metapath+"/"+"Recode"+"/"+colName; String map = IOUtilFunctions.toString(Connection.class.getResourceAsStream(name+TfUtils.TXMTD_RCD_MAP_SUFFIX)); @@ -294,18 +295,18 @@ public class TfMetaUtils * @return * @throws IOException */ - private static FrameBlock convertToTransformMetaDataFrame(int rows, List<String> colnames, List<Integer> rcIDs, List<Integer> binIDs, + private static FrameBlock convertToTransformMetaDataFrame(int rows, String[] colnames, List<Integer> rcIDs, List<Integer> binIDs, HashMap<String,String> meta, HashMap<String,String> mvmeta) throws IOException { //create frame block w/ pure string schema - List<ValueType> schema = Collections.nCopies(colnames.size(), ValueType.STRING); + ValueType[] schema = UtilFunctions.nCopies(colnames.length, ValueType.STRING); FrameBlock ret = new FrameBlock(schema, colnames); ret.ensureAllocatedColumns(rows); //encode recode maps (recoding/dummycoding) into frame for( Integer colID : rcIDs ) { - String name = colnames.get(colID-1); + String name = colnames[colID-1]; String map = meta.get(name); if( map == null ) throw new IOException("Recode map for column '"+name+"' (id="+colID+") not existing."); @@ -324,7 +325,7 @@ public class TfMetaUtils //encode bin maps (binning) into frame for( Integer colID : binIDs ) { - String name = colnames.get(colID-1); + String name = colnames[colID-1]; String map = meta.get(name); if( map == null ) throw new IOException("Binning map for column '"+name+"' (id="+colID+") not existing."); @@ -343,7 +344,7 @@ public class TfMetaUtils //encode impute meta data into frame for( Entry<String, String> e : mvmeta.entrySet() ) { - int colID = colnames.indexOf(e.getKey()) + 1; + int colID = ArrayUtils.indexOf(colnames, e.getKey()) + 1; String mvVal = e.getValue().split(TfUtils.TXMTD_SEP)[1]; ret.getColumnMetadata(colID-1).setMvValue(mvVal); } @@ -361,7 +362,7 @@ public class TfMetaUtils * @throws IOException */ @SuppressWarnings("unchecked") - private static List<Integer> parseRecodeColIDs(String spec, List<String> colnames) + private static List<Integer> parseRecodeColIDs(String spec, String[] colnames) throws IOException { if( spec == null ) @@ -391,7 +392,7 @@ public class TfMetaUtils * @return * @throws IOException */ - public static List<Integer> parseBinningColIDs(String spec, List<String> colnames) + public static List<Integer> parseBinningColIDs(String spec, String[] colnames) throws IOException { try { @@ -409,7 +410,7 @@ public class TfMetaUtils * @return * @throws IOException */ - public static List<Integer> parseBinningColIDs(JSONObject jSpec, List<String> colnames) + public static List<Integer> parseBinningColIDs(JSONObject jSpec, String[] colnames) throws IOException { try { http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/eb988781/src/main/java/org/apache/sysml/runtime/util/DataConverter.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/runtime/util/DataConverter.java b/src/main/java/org/apache/sysml/runtime/util/DataConverter.java index 9bb27d9..381ad87 100644 --- a/src/main/java/org/apache/sysml/runtime/util/DataConverter.java +++ b/src/main/java/org/apache/sysml/runtime/util/DataConverter.java @@ -23,7 +23,6 @@ import java.io.IOException; import java.text.DecimalFormat; import java.util.ArrayList; import java.util.Arrays; -import java.util.Collections; import java.util.HashMap; import java.util.Iterator; import java.util.List; @@ -629,10 +628,10 @@ public class DataConverter MatrixBlock mb = new MatrixBlock(m, n, false); mb.allocateDenseBlock(); - List<ValueType> schema = frame.getSchema(); - int dFreq = Collections.frequency(schema, ValueType.DOUBLE); + ValueType[] schema = frame.getSchema(); + int dFreq = UtilFunctions.frequency(schema, ValueType.DOUBLE); - if( dFreq == schema.size() ) { + if( dFreq == schema.length ) { // special case double schema (without cell-object creation, // cache-friendly row-column copy) double[][] a = new double[n][]; @@ -654,7 +653,7 @@ public class DataConverter for( int i=0; i<frame.getNumRows(); i++ ) for( int j=0; j<frame.getNumColumns(); j++ ) { mb.appendValue(i, j, UtilFunctions.objectToDouble( - schema.get(j), frame.get(i, j))); + schema[j], frame.get(i, j))); } } @@ -699,7 +698,7 @@ public class DataConverter return new FrameBlock(); //create schema and frame block - List<ValueType> schema = Collections.nCopies(data[0].length, ValueType.STRING); + ValueType[] schema = UtilFunctions.nCopies(data[0].length, ValueType.STRING); return convertToFrameBlock(data, schema); } @@ -709,7 +708,7 @@ public class DataConverter * @param schema * @return */ - public static FrameBlock convertToFrameBlock(String[][] data, List<ValueType> schema) { + public static FrameBlock convertToFrameBlock(String[][] data, ValueType[] schema) { //check for empty frame block if( data == null || data.length==0 ) return new FrameBlock(); @@ -725,7 +724,7 @@ public class DataConverter * @param colnames * @return */ - public static FrameBlock convertToFrameBlock(String[][] data, List<ValueType> schema, List<String> colnames) { + public static FrameBlock convertToFrameBlock(String[][] data, ValueType[] schema, String[] colnames) { //check for empty frame block if( data == null || data.length==0 ) return new FrameBlock(); @@ -753,7 +752,7 @@ public class DataConverter */ public static FrameBlock convertToFrameBlock(MatrixBlock mb, ValueType vt) { //create schema and frame block - List<ValueType> schema = Collections.nCopies(mb.getNumColumns(), vt); + ValueType[] schema = UtilFunctions.nCopies(mb.getNumColumns(), vt); return convertToFrameBlock(mb, schema); } @@ -763,7 +762,7 @@ public class DataConverter * @param schema * @return */ - public static FrameBlock convertToFrameBlock(MatrixBlock mb, List<ValueType> schema) + public static FrameBlock convertToFrameBlock(MatrixBlock mb, ValueType[] schema) { FrameBlock frame = new FrameBlock(schema); Object[] row = new Object[mb.getNumColumns()]; @@ -780,7 +779,7 @@ public class DataConverter double[] aval = sblock.values(i); for( int j=apos; j<apos+alen; j++ ) { row[aix[j]] = UtilFunctions.doubleToObject( - schema.get(aix[j]), aval[j]); + schema[aix[j]], aval[j]); } } frame.appendRow(row); @@ -788,9 +787,9 @@ public class DataConverter } else //DENSE { - int dFreq = Collections.frequency(schema, ValueType.DOUBLE); + int dFreq = UtilFunctions.frequency(schema, ValueType.DOUBLE); - if( dFreq == schema.size() ) { + if( dFreq == schema.length ) { // special case double schema (without cell-object creation, // col pre-allocation, and cache-friendly row-column copy) int m = mb.getNumRows(); @@ -816,7 +815,7 @@ public class DataConverter for( int i=0; i<mb.getNumRows(); i++ ) { for( int j=0; j<mb.getNumColumns(); j++ ) { row[j] = UtilFunctions.doubleToObject( - schema.get(j), mb.quickGetValue(i, j)); + schema[j], mb.quickGetValue(i, j)); } frame.appendRow(row); } @@ -1052,7 +1051,7 @@ public class DataConverter //print column names sb.append("#"); sb.append(separator); for( int j=0; j<colLength; j++ ) { - sb.append(fb.getColumnNames().get(j)); + sb.append(fb.getColumnNames()[j]); if( j != colLength-1 ) sb.append(separator); } @@ -1061,7 +1060,7 @@ public class DataConverter //print schema sb.append("#"); sb.append(separator); for( int j=0; j<colLength; j++ ) { - sb.append(fb.getSchema().get(j)); + sb.append(fb.getSchema()[j]); if( j != colLength-1 ) sb.append(separator); } @@ -1078,7 +1077,7 @@ public class DataConverter Object[] row = iter.next(); for( int j=0; j<colLength; j++ ) { if( row[j]!=null ) { - if( fb.getSchema().get(j) == ValueType.DOUBLE ) + if( fb.getSchema()[j] == ValueType.DOUBLE ) sb.append(df.format(row[j])); else sb.append(row[j]); http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/eb988781/src/main/java/org/apache/sysml/runtime/util/MapReduceTool.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/runtime/util/MapReduceTool.java b/src/main/java/org/apache/sysml/runtime/util/MapReduceTool.java index c99b79b..de6b8e8 100644 --- a/src/main/java/org/apache/sysml/runtime/util/MapReduceTool.java +++ b/src/main/java/org/apache/sysml/runtime/util/MapReduceTool.java @@ -25,7 +25,6 @@ import java.io.FileNotFoundException; import java.io.IOException; import java.io.InputStreamReader; import java.io.OutputStreamWriter; -import java.util.List; import org.apache.commons.logging.Log; import org.apache.commons.logging.LogFactory; @@ -432,7 +431,7 @@ public class MapReduceTool writeMetaDataFile(mtdfile, vt, null, DataType.MATRIX, mc, outinfo); } - public static void writeMetaDataFile(String mtdfile, ValueType vt, List<ValueType> schema, DataType dt, MatrixCharacteristics mc, OutputInfo outinfo) + public static void writeMetaDataFile(String mtdfile, ValueType vt, ValueType[] schema, DataType dt, MatrixCharacteristics mc, OutputInfo outinfo) throws IOException { writeMetaDataFile(mtdfile, vt, schema, dt, mc, outinfo, null); } @@ -442,7 +441,7 @@ public class MapReduceTool writeMetaDataFile(mtdfile, vt, null, DataType.MATRIX, mc, outinfo, formatProperties); } - public static void writeMetaDataFile(String mtdfile, ValueType vt, List<ValueType> schema, DataType dt, MatrixCharacteristics mc, + public static void writeMetaDataFile(String mtdfile, ValueType vt, ValueType[] schema, DataType dt, MatrixCharacteristics mc, OutputInfo outinfo, FileFormatProperties formatProperties) throws IOException { @@ -490,7 +489,7 @@ public class MapReduceTool * @throws JSONException * @throws DMLRuntimeException */ - public static String metaDataToString(String mtdfile, ValueType vt, List<ValueType> schema, DataType dt, MatrixCharacteristics mc, + public static String metaDataToString(String mtdfile, ValueType vt, ValueType[] schema, DataType dt, MatrixCharacteristics mc, OutputInfo outinfo, FileFormatProperties formatProperties) throws JSONException, DMLRuntimeException { OrderedJSONObject mtd = new OrderedJSONObject(); // maintain order in output file @@ -502,11 +501,11 @@ public class MapReduceTool } else { StringBuffer schemaSB = new StringBuffer(); - for(int i=0; i < schema.size(); i++) { - if( schema.get(i) == ValueType.UNKNOWN ) + for(int i=0; i < schema.length; i++) { + if( schema[i] == ValueType.UNKNOWN ) schemaSB.append("*"); else - schemaSB.append(schema.get(i).toString()); + schemaSB.append(schema[i].toString()); schemaSB.append(DataExpression.DEFAULT_DELIM_DELIMITER); } mtd.put(DataExpression.SCHEMAPARAM, schemaSB.toString()); http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/eb988781/src/main/java/org/apache/sysml/runtime/util/UtilFunctions.java ---------------------------------------------------------------------- diff --git a/src/main/java/org/apache/sysml/runtime/util/UtilFunctions.java b/src/main/java/org/apache/sysml/runtime/util/UtilFunctions.java index e5a792a..89472e1 100644 --- a/src/main/java/org/apache/sysml/runtime/util/UtilFunctions.java +++ b/src/main/java/org/apache/sysml/runtime/util/UtilFunctions.java @@ -23,6 +23,7 @@ import java.util.ArrayList; import java.util.Arrays; import java.util.List; +import org.apache.commons.lang.ArrayUtils; import org.apache.sysml.parser.Expression.DataType; import org.apache.sysml.parser.Expression.ValueType; import org.apache.sysml.runtime.instructions.InstructionUtils; @@ -604,25 +605,6 @@ public class UtilFunctions } /** - * Returns the subset of the schema - * - * @param srcSchema - * @param lStart - * @param lEnd - * - * @return - */ - public static List<ValueType> getSubSchema(List<ValueType> srcSchema, long lStart, long lEnd) - { - ValueType [] schema = new ValueType[(int) (lEnd-lStart+1)]; - for(int i = 0; i < schema.length; i++) - schema[i] = srcSchema.get((int) (lStart+i)); - - return Arrays.asList(schema); - } - - - /** * This function will return datatype, if its Matrix or Frame * * @param str @@ -660,4 +642,39 @@ public class UtilFunctions return (!sobj.equals("0") && !sobj.equals("0.0")); } } + + /** + * + * @param n + * @param vt + * @return + */ + public static ValueType[] nCopies(int n, ValueType vt) { + ValueType[] ret = new ValueType[n]; + Arrays.fill(ret, vt); + return ret; + } + + /** + * + * @param schema + * @param vt + * @return + */ + public static int frequency(ValueType[] schema, ValueType vt) { + int count = 0; + for( ValueType tmp : schema ) + count += tmp.equals(vt) ? 1 : 0; + return count; + } + + /** + * + * @param schema1 + * @param schema2 + * @return + */ + public static ValueType[] copyOf(ValueType[] schema1, ValueType[] schema2) { + return (ValueType[]) ArrayUtils.addAll(schema1, schema2); + } } http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/eb988781/src/test/java/org/apache/sysml/test/integration/AutomatedTestBase.java ---------------------------------------------------------------------- diff --git a/src/test/java/org/apache/sysml/test/integration/AutomatedTestBase.java b/src/test/java/org/apache/sysml/test/integration/AutomatedTestBase.java index 4273113..e7bb720 100644 --- a/src/test/java/org/apache/sysml/test/integration/AutomatedTestBase.java +++ b/src/test/java/org/apache/sysml/test/integration/AutomatedTestBase.java @@ -31,7 +31,6 @@ import java.lang.reflect.Field; import java.util.ArrayList; import java.util.Arrays; import java.util.HashMap; -import java.util.List; import org.apache.sysml.lops.Lop; import org.apache.commons.io.FileUtils; @@ -1700,7 +1699,7 @@ public abstract class AutomatedTestBase * @throws IOException * @throws DMLRuntimeException */ - protected double[][] writeInputFrame(String name, double[][] data, boolean bIncludeR, List<ValueType> schema, OutputInfo oi) + protected double[][] writeInputFrame(String name, double[][] data, boolean bIncludeR, ValueType[] schema, OutputInfo oi) throws DMLRuntimeException, IOException { String completePath = baseDirectory + INPUT_DIR + name; @@ -1725,14 +1724,14 @@ public abstract class AutomatedTestBase return data; } - protected double[][] writeInputFrameWithMTD(String name, double[][] data, boolean bIncludeR, List<ValueType> schema, OutputInfo oi) + protected double[][] writeInputFrameWithMTD(String name, double[][] data, boolean bIncludeR, ValueType[] schema, OutputInfo oi) throws DMLRuntimeException, IOException { MatrixCharacteristics mc = new MatrixCharacteristics(data.length, data[0].length, OptimizerUtils.DEFAULT_BLOCKSIZE, data[0].length, -1); return writeInputFrameWithMTD(name, data, bIncludeR, mc, schema, oi); } - protected double[][] writeInputFrameWithMTD(String name, double[][] data, boolean bIncludeR, MatrixCharacteristics mc, List<ValueType> schema, OutputInfo oi) + protected double[][] writeInputFrameWithMTD(String name, double[][] data, boolean bIncludeR, MatrixCharacteristics mc, ValueType[] schema, OutputInfo oi) throws DMLRuntimeException, IOException { writeInputFrame(name, data, bIncludeR, schema, oi); @@ -1766,7 +1765,7 @@ public abstract class AutomatedTestBase * @throws IOException * @throws DMLRuntimeException */ - protected double[][] writeInputFrame(String name, double[][] data, List<ValueType> schema, OutputInfo oi) + protected double[][] writeInputFrame(String name, double[][] data, ValueType[] schema, OutputInfo oi) throws DMLRuntimeException, IOException { return writeInputFrame(name, data, false, schema, oi); http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/eb988781/src/test/java/org/apache/sysml/test/integration/functions/frame/FrameAppendDistTest.java ---------------------------------------------------------------------- diff --git a/src/test/java/org/apache/sysml/test/integration/functions/frame/FrameAppendDistTest.java b/src/test/java/org/apache/sysml/test/integration/functions/frame/FrameAppendDistTest.java index 0d3b932..ccb91f4 100644 --- a/src/test/java/org/apache/sysml/test/integration/functions/frame/FrameAppendDistTest.java +++ b/src/test/java/org/apache/sysml/test/integration/functions/frame/FrameAppendDistTest.java @@ -20,7 +20,6 @@ package org.apache.sysml.test.integration.functions.frame; import java.util.ArrayList; -import java.util.Arrays; import java.util.Collections; import java.util.List; @@ -158,11 +157,11 @@ public class FrameAppendDistTest extends AutomatedTestBase inputDir() + " " + expectedDir() + " " + (rbind? "rbind": "cbind"); //initialize the frame data. - List<ValueType> lschemaA = Arrays.asList(genMixSchema(cols1)); + ValueType[] lschemaA = genMixSchema(cols1); double[][] A = getRandomMatrix(rows1, cols1, min, max, sparsity, 1111 /*\\System.currentTimeMillis()*/); writeInputFrameWithMTD("A", A, true, lschemaA, OutputInfo.BinaryBlockOutputInfo); - List<ValueType> lschemaB = Arrays.asList(genMixSchema(cols2)); + ValueType[] lschemaB = genMixSchema(cols2); double[][] B = getRandomMatrix(rows2, cols2, min, max, sparsity, 2345 /*\\System.currentTimeMillis()*/); writeInputFrameWithMTD("B", B, true, lschemaB, OutputInfo.BinaryBlockOutputInfo); @@ -171,15 +170,14 @@ public class FrameAppendDistTest extends AutomatedTestBase runTest(true, exceptionExpected, null, expectedNumberOfJobs); runRScript(true); - List<ValueType> lschemaAB = new ArrayList<ValueType>(lschemaA); - lschemaAB.addAll(lschemaB); + ValueType[] lschemaAB = UtilFunctions.copyOf(lschemaA, lschemaB); for(String file: config.getOutputFiles()) { FrameBlock frameBlock = readDMLFrameFromHDFS(file, InputInfo.BinaryBlockInputInfo); MatrixCharacteristics md = new MatrixCharacteristics(frameBlock.getNumRows(), frameBlock.getNumColumns(), -1, -1); FrameBlock frameRBlock = readRFrameFromHDFS(file+".csv", InputInfo.CSVInputInfo, md); - verifyFrameData(frameBlock, frameRBlock, (ValueType[]) lschemaAB.toArray(new ValueType[0])); + verifyFrameData(frameBlock, frameRBlock, (ValueType[]) lschemaAB); System.out.println("File processed is " + file); } } http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/eb988781/src/test/java/org/apache/sysml/test/integration/functions/frame/FrameAppendTest.java ---------------------------------------------------------------------- diff --git a/src/test/java/org/apache/sysml/test/integration/functions/frame/FrameAppendTest.java b/src/test/java/org/apache/sysml/test/integration/functions/frame/FrameAppendTest.java index 555cf55..46d07b5 100644 --- a/src/test/java/org/apache/sysml/test/integration/functions/frame/FrameAppendTest.java +++ b/src/test/java/org/apache/sysml/test/integration/functions/frame/FrameAppendTest.java @@ -19,9 +19,6 @@ package org.apache.sysml.test.integration.functions.frame; -import java.util.Arrays; -import java.util.List; - import org.apache.sysml.parser.Expression.ValueType; import org.apache.sysml.runtime.instructions.cp.AppendCPInstruction.AppendType; import org.apache.sysml.runtime.matrix.data.FrameBlock; @@ -90,24 +87,22 @@ public class FrameAppendTest extends AutomatedTestBase double[][] B = getRandomMatrix(rows, schema2.length, -10, 10, 0.9, 129); //init data frame 1 - List<ValueType> lschema1 = Arrays.asList(schema1); - FrameBlock frame1 = new FrameBlock(lschema1); - Object[] row1 = new Object[lschema1.size()]; + FrameBlock frame1 = new FrameBlock(schema1); + Object[] row1 = new Object[schema1.length]; for( int i=0; i<rows; i++ ) { - for( int j=0; j<lschema1.size(); j++ ) - A[i][j] = UtilFunctions.objectToDouble(lschema1.get(j), - row1[j] = UtilFunctions.doubleToObject(lschema1.get(j), A[i][j])); + for( int j=0; j<schema1.length; j++ ) + A[i][j] = UtilFunctions.objectToDouble(schema1[j], + row1[j] = UtilFunctions.doubleToObject(schema1[j], A[i][j])); frame1.appendRow(row1); } //init data frame 2 - List<ValueType> lschema2 = Arrays.asList(schema2); - FrameBlock frame2 = new FrameBlock(lschema2); - Object[] row2 = new Object[lschema2.size()]; + FrameBlock frame2 = new FrameBlock(schema2); + Object[] row2 = new Object[schema2.length]; for( int i=0; i<rows; i++ ) { - for( int j=0; j<lschema2.size(); j++ ) - B[i][j] = UtilFunctions.objectToDouble(lschema2.get(j), - row2[j] = UtilFunctions.doubleToObject(lschema2.get(j), B[i][j])); + for( int j=0; j<schema2.length; j++ ) + B[i][j] = UtilFunctions.objectToDouble(schema2[j], + row2[j] = UtilFunctions.doubleToObject(schema2[j], B[i][j])); frame2.appendRow(row2); } @@ -125,10 +120,10 @@ public class FrameAppendTest extends AutomatedTestBase Assert.fail("Wrong number of rows: "+frame3.getNumRows()+", expected: "+mbC.getNumRows()); //check correct values - List<ValueType> lschema = frame3.getSchema(); + ValueType[] lschema = frame3.getSchema(); for( int i=0; i<rows; i++ ) - for( int j=0; j<lschema.size(); j++ ) { - double tmp = UtilFunctions.objectToDouble(lschema.get(j), frame3.get(i, j)); + for( int j=0; j<lschema.length; j++ ) { + double tmp = UtilFunctions.objectToDouble(lschema[j], frame3.get(i, j)); if( tmp != mbC.quickGetValue(i, j) ) Assert.fail("Wrong get value for cell ("+i+","+j+"): "+tmp+", expected: "+mbC.quickGetValue(i, j)); } http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/eb988781/src/test/java/org/apache/sysml/test/integration/functions/frame/FrameCastingTest.java ---------------------------------------------------------------------- diff --git a/src/test/java/org/apache/sysml/test/integration/functions/frame/FrameCastingTest.java b/src/test/java/org/apache/sysml/test/integration/functions/frame/FrameCastingTest.java index 5fe14dd..c7a5557 100644 --- a/src/test/java/org/apache/sysml/test/integration/functions/frame/FrameCastingTest.java +++ b/src/test/java/org/apache/sysml/test/integration/functions/frame/FrameCastingTest.java @@ -19,9 +19,6 @@ package org.apache.sysml.test.integration.functions.frame; -import java.util.Arrays; -import java.util.List; - import org.apache.sysml.parser.Expression.ValueType; import org.apache.sysml.runtime.matrix.data.FrameBlock; import org.apache.sysml.runtime.matrix.data.MatrixBlock; @@ -102,12 +99,11 @@ public class FrameCastingTest extends AutomatedTestBase if( ctype == CastType.F2M ) { //construct input schema - List<ValueType> lschema1 = Arrays.asList(schema); - FrameBlock frame1 = new FrameBlock(lschema1); - Object[] row1 = new Object[lschema1.size()]; + FrameBlock frame1 = new FrameBlock(schema); + Object[] row1 = new Object[schema.length]; for( int i=0; i<rows; i++ ) { - for( int j=0; j<lschema1.size(); j++ ) - row1[j] = UtilFunctions.doubleToObject(lschema1.get(j), A[i][j]); + for( int j=0; j<schema.length; j++ ) + row1[j] = UtilFunctions.doubleToObject(schema[j], A[i][j]); frame1.appendRow(row1); } @@ -122,7 +118,7 @@ public class FrameCastingTest extends AutomatedTestBase else if( ctype == CastType.M2F_S ) { MatrixBlock mb = DataConverter.convertToMatrixBlock(A); - frame = DataConverter.convertToFrameBlock(mb, Arrays.asList(schema)); + frame = DataConverter.convertToFrameBlock(mb, schema); } //check basic meta data @@ -130,10 +126,10 @@ public class FrameCastingTest extends AutomatedTestBase Assert.fail("Wrong number of rows: "+frame.getNumRows()+", expected: "+rows); //check correct values - List<ValueType> lschema = frame.getSchema(); + ValueType[] lschema = frame.getSchema(); for( int i=0; i<rows; i++ ) - for( int j=0; j<lschema.size(); j++ ) { - double tmp = UtilFunctions.objectToDouble(lschema.get(j), frame.get(i, j)); + for( int j=0; j<lschema.length; j++ ) { + double tmp = UtilFunctions.objectToDouble(lschema[j], frame.get(i, j)); if( tmp != A[i][j] ) Assert.fail("Wrong get value for cell ("+i+","+j+"): "+tmp+", expected: "+A[i][j]); } http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/eb988781/src/test/java/org/apache/sysml/test/integration/functions/frame/FrameConverterTest.java ---------------------------------------------------------------------- diff --git a/src/test/java/org/apache/sysml/test/integration/functions/frame/FrameConverterTest.java b/src/test/java/org/apache/sysml/test/integration/functions/frame/FrameConverterTest.java index e8c3c51..511f11c 100644 --- a/src/test/java/org/apache/sysml/test/integration/functions/frame/FrameConverterTest.java +++ b/src/test/java/org/apache/sysml/test/integration/functions/frame/FrameConverterTest.java @@ -285,9 +285,8 @@ public class FrameConverterTest extends AutomatedTestBase try { //initialize the frame data. - List<ValueType> lschema = Arrays.asList(schema); - FrameBlock frame1 = new FrameBlock(lschema); - initFrameData(frame1, A, lschema); + FrameBlock frame1 = new FrameBlock(schema); + initFrameData(frame1, A, schema); //write frame data to hdfs FrameWriter writer = FrameWriterFactory.createFrameWriter(oinfo); @@ -346,9 +345,8 @@ public class FrameConverterTest extends AutomatedTestBase } else { //initialize the frame data. - List<ValueType> lschema = Arrays.asList(schema); - frame1 = new FrameBlock(lschema); - initFrameData(frame1, A, lschema); + frame1 = new FrameBlock(schema); + initFrameData(frame1, A, schema); //write frame data to hdfs FrameWriter writer = FrameWriterFactory.createFrameWriter(oinfo); @@ -393,12 +391,12 @@ public class FrameConverterTest extends AutomatedTestBase * @param data * @param lschema */ - private void initFrameData(FrameBlock frame, double[][] data, List<ValueType> lschema) { - Object[] row1 = new Object[lschema.size()]; + private void initFrameData(FrameBlock frame, double[][] data, ValueType[] lschema) { + Object[] row1 = new Object[lschema.length]; for( int i=0; i<rows; i++ ) { - for( int j=0; j<lschema.size(); j++ ) - data[i][j] = UtilFunctions.objectToDouble(lschema.get(j), - row1[j] = UtilFunctions.doubleToObject(lschema.get(j), data[i][j])); + for( int j=0; j<lschema.length; j++ ) + data[i][j] = UtilFunctions.objectToDouble(lschema[j], + row1[j] = UtilFunctions.doubleToObject(lschema[j], data[i][j])); frame.appendRow(row1); } } @@ -428,10 +426,10 @@ public class FrameConverterTest extends AutomatedTestBase private void verifyFrameMatrixData(FrameBlock frame, MatrixBlock matrix) { for ( int i=0; i<frame.getNumRows(); i++ ) for( int j=0; j<frame.getNumColumns(); j++ ) { - Object val1 = UtilFunctions.doubleToObject(frame.getSchema().get(j), - UtilFunctions.objectToDouble(frame.getSchema().get(j), frame.get(i, j))); - Object val2 = UtilFunctions.doubleToObject(frame.getSchema().get(j), matrix.getValue(i, j)); - if(( UtilFunctions.compareTo(frame.getSchema().get(j), val1, val2)) != 0) + Object val1 = UtilFunctions.doubleToObject(frame.getSchema()[j], + UtilFunctions.objectToDouble(frame.getSchema()[j], frame.get(i, j))); + Object val2 = UtilFunctions.doubleToObject(frame.getSchema()[j], matrix.getValue(i, j)); + if(( UtilFunctions.compareTo(frame.getSchema()[j], val1, val2)) != 0) Assert.fail("Frame value for cell ("+ i + "," + j + ") is " + val1 + ", is not same as matrix value " + val2); } @@ -455,6 +453,7 @@ public class FrameConverterTest extends AutomatedTestBase { SparkExecutionContext sec = (SparkExecutionContext) ExecutionContextFactory.createContext(); JavaSparkContext sc = sec.getSparkContext(); + ValueType[] lschema = schema.toArray(new ValueType[0]); MapReduceTool.deleteFileIfExistOnHDFS(fnameOut); @@ -483,7 +482,7 @@ public class FrameConverterTest extends AutomatedTestBase OutputInfo oinfo = OutputInfo.BinaryBlockOutputInfo; JavaPairRDD<LongWritable,Text> rddIn = sc.hadoopFile(fnameIn, iinfo.inputFormatClass, iinfo.inputKeyClass, iinfo.inputValueClass); JavaPairRDD<LongWritable, FrameBlock> rddOut = FrameRDDConverterUtils - .textCellToBinaryBlock(sc, rddIn, mc, schema) + .textCellToBinaryBlock(sc, rddIn, mc, lschema) .mapToPair(new LongFrameToLongWritableFrameFunction()); rddOut.saveAsHadoopFile(fnameOut, LongWritable.class, FrameBlock.class, oinfo.outputFormatClass); break; @@ -519,8 +518,8 @@ public class FrameConverterTest extends AutomatedTestBase //Create DataFrame SQLContext sqlContext = new SQLContext(sc); - StructType dfSchema = FrameRDDConverterUtils.convertFrameSchemaToDFSchema(schema, false); - JavaRDD<Row> rowRDD = FrameRDDConverterUtils.csvToRowRDD(sc, fnameIn, separator, schema); + StructType dfSchema = FrameRDDConverterUtils.convertFrameSchemaToDFSchema(lschema, false); + JavaRDD<Row> rowRDD = FrameRDDConverterUtils.csvToRowRDD(sc, fnameIn, separator, lschema); DataFrame df = sqlContext.createDataFrame(rowRDD, dfSchema); JavaPairRDD<LongWritable, FrameBlock> rddOut = FrameRDDConverterUtils @@ -535,7 +534,7 @@ public class FrameConverterTest extends AutomatedTestBase JavaPairRDD<Long, FrameBlock> rddIn = sc .hadoopFile(fnameIn, iinfo.inputFormatClass, LongWritable.class, FrameBlock.class) .mapToPair(new LongWritableFrameToLongFrameFunction()); - DataFrame df = FrameRDDConverterUtils.binaryBlockToDataFrame(new SQLContext(sc), rddIn, mc, schema); + DataFrame df = FrameRDDConverterUtils.binaryBlockToDataFrame(new SQLContext(sc), rddIn, mc, lschema); //Convert back DataFrame to binary block for comparison using original binary to converted DF and back to binary JavaPairRDD<LongWritable, FrameBlock> rddOut = FrameRDDConverterUtils http://git-wip-us.apache.org/repos/asf/incubator-systemml/blob/eb988781/src/test/java/org/apache/sysml/test/integration/functions/frame/FrameCopyTest.java ---------------------------------------------------------------------- diff --git a/src/test/java/org/apache/sysml/test/integration/functions/frame/FrameCopyTest.java b/src/test/java/org/apache/sysml/test/integration/functions/frame/FrameCopyTest.java index e713a86..84bd36d 100644 --- a/src/test/java/org/apache/sysml/test/integration/functions/frame/FrameCopyTest.java +++ b/src/test/java/org/apache/sysml/test/integration/functions/frame/FrameCopyTest.java @@ -19,9 +19,6 @@ package org.apache.sysml.test.integration.functions.frame; -import java.util.Arrays; -import java.util.List; - import org.apache.sysml.parser.Expression.ValueType; import org.apache.sysml.runtime.instructions.cp.AppendCPInstruction.AppendType; import org.apache.sysml.runtime.matrix.data.FrameBlock; @@ -89,14 +86,12 @@ public class FrameCopyTest extends AutomatedTestBase //Initialize the frame data. //init data frame 1 - List<ValueType> lschema1 = Arrays.asList(schema1); - FrameBlock frame1 = new FrameBlock(lschema1); - initFrameData(frame1, A, lschema1); + FrameBlock frame1 = new FrameBlock(schema1); + initFrameData(frame1, A, schema1); //init data frame 2 - List<ValueType> lschema2 = Arrays.asList(schema2); - FrameBlock frame2 = new FrameBlock(lschema2); - initFrameData(frame2, B, lschema2); + FrameBlock frame2 = new FrameBlock(schema2); + initFrameData(frame2, B, schema2); //copy from one frame to another. FrameBlock frame1Backup = new FrameBlock(frame1.getSchema(), frame1.getColumnNames()); @@ -125,44 +120,44 @@ public class FrameCopyTest extends AutomatedTestBase } } - void initFrameData(FrameBlock frame, double[][] data, List<ValueType> lschema) + void initFrameData(FrameBlock frame, double[][] data, ValueType[] lschema) { - Object[] row1 = new Object[lschema.size()]; + Object[] row1 = new Object[lschema.length]; for( int i=0; i<rows; i++ ) { - for( int j=0; j<lschema.size(); j++ ) - data[i][j] = UtilFunctions.objectToDouble(lschema.get(j), - row1[j] = UtilFunctions.doubleToObject(lschema.get(j), data[i][j])); + for( int j=0; j<lschema.length; j++ ) + data[i][j] = UtilFunctions.objectToDouble(lschema[j], + row1[j] = UtilFunctions.doubleToObject(lschema[j], data[i][j])); frame.appendRow(row1); } } void updateFrameWithDummyData(FrameBlock frame, int updateRow) { - List<ValueType>lschema = frame.getSchema(); - for( int j=0; j<lschema.size(); j++ ) { - switch( lschema.get(j) ) { + ValueType[] lschema = frame.getSchema(); + for( int j=0; j<lschema.length; j++ ) { + switch( lschema[j] ) { case STRING: frame.set(updateRow, j, "String:"+ frame.get(updateRow, j)); break; case BOOLEAN: frame.set(updateRow, j, ((Boolean)frame.get(updateRow, j))?(new Boolean(false)):(new Boolean(true))); break; case INT: frame.set(updateRow, j, (Long)frame.get(updateRow, j) * 2 + 5); break; case DOUBLE: frame.set(updateRow, j, (Double)frame.get(updateRow, j) * 2 + 7); break; - default: throw new RuntimeException("Unsupported value type: "+lschema.get(j)); + default: throw new RuntimeException("Unsupported value type: "+lschema[j]); } } } void verifyFrameData(FrameBlock frame1, FrameBlock frame2, int updateRow, boolean bEqual) { - List<ValueType>lschema = frame1.getSchema(); - for( int j=0; j<lschema.size(); j++ ) { + ValueType[ ]lschema = frame1.getSchema(); + for( int j=0; j<lschema.length; j++ ) { if(!bEqual) { - if( UtilFunctions.compareTo(lschema.get(j), frame1.get(updateRow, j), frame2.get(updateRow, j)) == 0) + if( UtilFunctions.compareTo(lschema[j], frame1.get(updateRow, j), frame2.get(updateRow, j)) == 0) Assert.fail("Updated value for cell ("+ updateRow + "," + j + ") is " + frame1.get(updateRow, j) + ", same as original value "+frame2.get(updateRow, j)); } else { - if( UtilFunctions.compareTo(lschema.get(j), frame1.get(updateRow, j), frame2.get(updateRow, j)) != 0) + if( UtilFunctions.compareTo(lschema[j], frame1.get(updateRow, j), frame2.get(updateRow, j)) != 0) Assert.fail("Updated value for cell ("+ updateRow + "," + j + ") is " + frame1.get(updateRow, j) + ", not same as original value "+frame2.get(updateRow, j)); } @@ -171,10 +166,10 @@ public class FrameCopyTest extends AutomatedTestBase void verifyFrameData(FrameBlock frame1, FrameBlock frame2) { - List<ValueType> lschema = frame1.getSchema(); + ValueType[] lschema = frame1.getSchema(); for ( int i=0; i<frame1.getNumRows(); i++ ) - for( int j=0; j<lschema.size(); j++ ) { - if( UtilFunctions.compareTo(lschema.get(j), frame1.get(i, j), frame2.get(i, j)) != 0) + for( int j=0; j<lschema.length; j++ ) { + if( UtilFunctions.compareTo(lschema[j], frame1.get(i, j), frame2.get(i, j)) != 0) Assert.fail("Target value for cell ("+ i + "," + j + ") is " + frame1.get(i, j) + ", is not same as original value " + frame2.get(i, j)); }