phaniarnab commented on code in PR #2275: URL: https://github.com/apache/systemds/pull/2275#discussion_r2182386638
########## src/main/java/org/apache/sysds/runtime/transform/decode/ColumnDecoderDummycode.java: ########## @@ -0,0 +1,154 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.sysds.runtime.transform.decode; + +import org.apache.sysds.common.Types; +import org.apache.sysds.runtime.frame.data.FrameBlock; +import org.apache.sysds.runtime.frame.data.columns.ColumnMetadata; +import org.apache.sysds.runtime.matrix.data.MatrixBlock; +import org.apache.sysds.runtime.util.UtilFunctions; + +import java.io.IOException; +import java.io.ObjectInput; +import java.io.ObjectOutput; +import java.util.ArrayList; +import java.util.Arrays; +import java.util.List; + +public class ColumnDecoderDummycode extends ColumnDecoder { + + private static final long serialVersionUID = 4758831042891032129L; + + private int[] _clPos = null; + private int[] _cuPos = null; + // category index for dedicated single-column decoders (-1 if not used) + private int _category = -1; + + protected ColumnDecoderDummycode(Types.ValueType[] schema, int[] colList) { + super(schema, colList); + } + + @Override + public FrameBlock columnDecode(MatrixBlock in, FrameBlock out) { + out.ensureAllocatedColumns(in.getNumRows()); + columnDecode(in, out, 0, in.getNumRows()); + return out; + } + + @Override + public void columnDecode(MatrixBlock in, FrameBlock out, int rl, int ru) { + if(_category >= 0) { + int col = _colList[0] - 1; + Object val = UtilFunctions.doubleToObject(out.getSchema()[col], _category); + for(int i = rl; i < ru; i++) + if(in.get(i, _clPos[0]-1) == 1) + synchronized(out) { out.set(i, col, val); } + } + else { + for( int i=rl; i<ru; i++ ) + for( int j=0; j<_colList.length; j++ ) + for( int k=_clPos[j]; k<_cuPos[j]; k++ ) + if( in.get(i, k-1) != 0 ) { + int col = _colList[j] - 1; + Object val = UtilFunctions.doubleToObject(out.getSchema()[col], k-_clPos[j]+1); + synchronized(out) { out.set(i, col, val); } + } + } Review Comment: A column decoder should work on a single column that is provided. ########## src/main/java/org/apache/sysds/runtime/transform/decode/ColumnDecoderRecode.java: ########## @@ -0,0 +1,195 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.sysds.runtime.transform.decode; + +import org.apache.sysds.common.Types.ValueType; +import org.apache.sysds.runtime.frame.data.FrameBlock; +import org.apache.sysds.runtime.matrix.data.MatrixBlock; +import org.apache.sysds.runtime.matrix.data.Pair; +import org.apache.sysds.runtime.transform.TfUtils; +import org.apache.sysds.runtime.transform.encode.ColumnEncoderRecode; +import org.apache.sysds.runtime.util.UtilFunctions; + +import java.io.IOException; +import java.io.ObjectInput; +import java.io.ObjectOutput; +import java.util.*; Review Comment: Avoid all import. Only import the required classes. ########## src/main/java/org/apache/sysds/runtime/transform/decode/ColumnDecoderBin.java: ########## @@ -0,0 +1,214 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.sysds.runtime.transform.decode; + +import org.apache.commons.lang3.NotImplementedException; +import org.apache.sysds.common.Types.ValueType; +import org.apache.sysds.runtime.DMLRuntimeException; +import org.apache.sysds.runtime.frame.data.FrameBlock; +import org.apache.sysds.runtime.frame.data.columns.Array; +import org.apache.sysds.runtime.matrix.data.MatrixBlock; +import org.apache.sysds.runtime.util.UtilFunctions; + +import java.io.IOException; +import java.io.ObjectInput; +import java.io.ObjectOutput; + +public class ColumnDecoderBin extends ColumnDecoder { + private static final long serialVersionUID = -3784249774608228805L; + + private int[] _numBins; + private double[][] _binMins = null; + private double[][] _binMaxs = null; + + public ColumnDecoderBin() { + super(null, null); + } + + protected ColumnDecoderBin(ValueType[] schema, int[] binCols) { + super(schema, binCols); + } + + + //@Override + //public FrameBlock columnDecode(MatrixBlock in, FrameBlock out) { +// + // long b1 = System.nanoTime(); + // out.ensureAllocatedColumns(in.getNumRows()); + // for (int i = 0; i < in.getNumRows(); i++) { + // for (int j = 0; j < _colList.length; j++) { + // double val = in.get(i, j); + // if (!Double.isNaN(val)) { + // int key = (int) Math.round(val); + // double bmin = _binMins[j][key - 1]; + // double bmax = _binMaxs[j][key - 1]; + // double oval = bmin + (bmax - bmin) / 2 + (val - key) * (bmax - bmin); + // out.getColumn(_colList[j] - 1).set(i, oval); + // } else { + // out.getColumn(_colList[j] - 1).set(i, val); + // } + // } + // } + // //columnDecode(in, out, 0, in.getNumRows()); + // long b2 = System.nanoTime(); + // System.out.println(this.getClass() + "time: " + (b2 - b1) / 1e6 + " ms"); + // return out; + //} + + @Override + public FrameBlock columnDecode(MatrixBlock in, FrameBlock out) { + long b1 = System.nanoTime(); + out.ensureAllocatedColumns(in.getNumRows()); + + final int outColIndex = _colList[0] - 1; + final double[] binMins = _binMins[0]; + final double[] binMaxs = _binMaxs[0]; + final int nRows = in.getNumRows(); + Array<?> a = out.getColumn(0); + for (int i = 0; i < nRows; i++) { + double val = in.get(i, 0); + double decoded; + if (!Double.isNaN(val)) { + int key = (int) Math.round(val); + double bmin = binMins[key - 1]; + double bmax = binMaxs[key - 1]; + decoded = bmin + (bmax - bmin) / 2 + + (val - key) * (bmax - bmin); + a.set(i, decoded); + } else { + a.set(i, val); + } + } + long b2 = System.nanoTime(); + System.out.println(this.getClass() +": "+ (b2 - b1) / 1e6 + " ms"); + return out; + } + + + @Override + public void columnDecode(MatrixBlock in, FrameBlock out, int rl, int ru) { + for (int i = rl; i < ru; i++) { + for (int j = 0; j < _colList.length; j++) { + double val = in.get(i, j); + if (!Double.isNaN(val)) { + int key = (int) Math.round(val); + double bmin = _binMins[j][key - 1]; + double bmax = _binMaxs[j][key - 1]; + double oval = bmin + (bmax - bmin) / 2 + (val - key) * (bmax - bmin); + out.getColumn(_colList[j] - 1).set(i, oval); + } else { + out.getColumn(_colList[j] - 1).set(i, val); + } + } Review Comment: I don't understand why you are iterating all columns in a column decoder. ########## src/main/java/org/apache/sysds/runtime/transform/decode/DecoderRecode.java: ########## @@ -71,12 +71,13 @@ public FrameBlock decode(MatrixBlock in, FrameBlock out) { @Override public void decode(MatrixBlock in, FrameBlock out, int rl, int ru) { - if( _onOut ) { //recode on output (after dummy) + if( _onOut ) { //recode on output (after dummy) for( int i=rl; i<ru; i++ ) { for( int j=0; j<_colList.length; j++ ) { int colID = _colList[j]; double val = UtilFunctions.objectToDouble( out.getSchema()[colID-1], out.get(i, colID-1)); + Review Comment: Remove these empty lines that you added. ########## src/main/java/org/apache/sysds/runtime/transform/decode/ColumnDecoderBin.java: ########## @@ -0,0 +1,214 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.sysds.runtime.transform.decode; + +import org.apache.commons.lang3.NotImplementedException; +import org.apache.sysds.common.Types.ValueType; +import org.apache.sysds.runtime.DMLRuntimeException; +import org.apache.sysds.runtime.frame.data.FrameBlock; +import org.apache.sysds.runtime.frame.data.columns.Array; +import org.apache.sysds.runtime.matrix.data.MatrixBlock; +import org.apache.sysds.runtime.util.UtilFunctions; + +import java.io.IOException; +import java.io.ObjectInput; +import java.io.ObjectOutput; + +public class ColumnDecoderBin extends ColumnDecoder { + private static final long serialVersionUID = -3784249774608228805L; + + private int[] _numBins; + private double[][] _binMins = null; + private double[][] _binMaxs = null; + + public ColumnDecoderBin() { + super(null, null); + } + + protected ColumnDecoderBin(ValueType[] schema, int[] binCols) { + super(schema, binCols); + } + + + //@Override + //public FrameBlock columnDecode(MatrixBlock in, FrameBlock out) { +// + // long b1 = System.nanoTime(); + // out.ensureAllocatedColumns(in.getNumRows()); + // for (int i = 0; i < in.getNumRows(); i++) { + // for (int j = 0; j < _colList.length; j++) { + // double val = in.get(i, j); + // if (!Double.isNaN(val)) { + // int key = (int) Math.round(val); + // double bmin = _binMins[j][key - 1]; + // double bmax = _binMaxs[j][key - 1]; + // double oval = bmin + (bmax - bmin) / 2 + (val - key) * (bmax - bmin); + // out.getColumn(_colList[j] - 1).set(i, oval); + // } else { + // out.getColumn(_colList[j] - 1).set(i, val); + // } + // } + // } + // //columnDecode(in, out, 0, in.getNumRows()); + // long b2 = System.nanoTime(); + // System.out.println(this.getClass() + "time: " + (b2 - b1) / 1e6 + " ms"); + // return out; + //} + + @Override + public FrameBlock columnDecode(MatrixBlock in, FrameBlock out) { + long b1 = System.nanoTime(); + out.ensureAllocatedColumns(in.getNumRows()); + + final int outColIndex = _colList[0] - 1; Review Comment: Why the outColIndex is always _colList[0] - 1? ########## src/main/java/org/apache/sysds/runtime/transform/decode/ColumnDecoderRecode.java: ########## @@ -0,0 +1,195 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.sysds.runtime.transform.decode; + +import org.apache.sysds.common.Types.ValueType; +import org.apache.sysds.runtime.frame.data.FrameBlock; +import org.apache.sysds.runtime.matrix.data.MatrixBlock; +import org.apache.sysds.runtime.matrix.data.Pair; +import org.apache.sysds.runtime.transform.TfUtils; +import org.apache.sysds.runtime.transform.encode.ColumnEncoderRecode; +import org.apache.sysds.runtime.util.UtilFunctions; + +import java.io.IOException; +import java.io.ObjectInput; +import java.io.ObjectOutput; +import java.util.*; + +public class ColumnDecoderRecode extends ColumnDecoder { + + private static final long serialVersionUID = -3784249774608228805L; + + private HashMap<Long, Object>[] _rcMaps = null; + private Object[][] _rcMapsDirect = null; + private boolean _onOut = false; + + public ColumnDecoderRecode() { + super(null, null); + } + + protected ColumnDecoderRecode(ValueType[] schema, boolean onOut, int[] rcCols) { + super(schema, rcCols); + _onOut = onOut; + } + + @Override + public FrameBlock columnDecode(MatrixBlock in, FrameBlock out) { + + long r1 = System.nanoTime(); + // TODO + out.ensureAllocatedColumns(in.getNumRows()); + columnDecode(in, out, 0, in.getNumRows()); + long r2 = System.nanoTime(); + System.out.println(this.getClass() + "time: " + (r2 - r1) / 1e6 + " ms"); + return out; + } + + @Override + public void columnDecode(MatrixBlock in, FrameBlock out, int rl, int ru) { + // TODO + if( _onOut ) { //recode on output (after dummy) + for( int i=rl; i<ru; i++ ) { + for( int j=0; j<_colList.length; j++ ) { + int colID = _colList[j]; + double val = UtilFunctions.objectToDouble( + out.getSchema()[colID-1], out.get(i, colID-1)); + long key = UtilFunctions.toLong(val); + out.set(i, colID-1, getRcMapValue(j, key)); + } Review Comment: Why are you iterating all the columns? A column decoder should be called for each column. ########## src/main/java/org/apache/sysds/runtime/transform/decode/ColumnDecoder.java: ########## @@ -0,0 +1,172 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.sysds.runtime.transform.decode; + +import org.apache.commons.logging.Log; +import org.apache.commons.logging.LogFactory; +import org.apache.sysds.common.Types.ValueType; +import org.apache.sysds.runtime.DMLRuntimeException; +import org.apache.sysds.runtime.frame.data.FrameBlock; +import org.apache.sysds.runtime.matrix.data.MatrixBlock; + + +import java.io.Externalizable; +import java.io.IOException; +import java.io.ObjectInput; +import java.io.ObjectOutput; + +public abstract class ColumnDecoder implements Externalizable { + protected static final Log LOG = LogFactory.getLog(Decoder.class.getName()); + private static final long serialVersionUID = -1732411001366177787L; + + protected ValueType[] _schema; + protected int[] _colList; + protected String[] _colnames = null; + protected ColumnDecoder(ValueType[] schema, int[] colList) { + _schema = schema; + _colList = colList; + } Review Comment: Why a column list? A column encoder should work on a single column. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: dev-unsubscr...@systemds.apache.org For queries about this service, please contact Infrastructure at: us...@infra.apache.org