phaniarnab commented on a change in pull request #1203:
URL: https://github.com/apache/systemds/pull/1203#discussion_r598936602
##########
File path:
src/main/java/org/apache/sysds/runtime/transform/encode/MultiColumnEncoder.java
##########
@@ -0,0 +1,446 @@
+package org.apache.sysds.runtime.transform.encode;
+
+import java.io.IOException;
+import java.io.ObjectInput;
+import java.io.ObjectOutput;
+import java.util.ArrayList;
+import java.util.HashSet;
+import java.util.List;
+import java.util.function.Consumer;
+import java.util.function.Function;
+import java.util.stream.Collectors;
+
+import org.apache.commons.logging.Log;
+import org.apache.commons.logging.LogFactory;
+import org.apache.sysds.common.Types;
+import org.apache.sysds.runtime.DMLRuntimeException;
+import org.apache.sysds.runtime.matrix.data.FrameBlock;
+import org.apache.sysds.runtime.matrix.data.MatrixBlock;
+import org.apache.sysds.runtime.util.IndexRange;
+
+public class MultiColumnEncoder implements Encoder {
+
Review comment:
Please fix the warnings.
##########
File path:
src/main/java/org/apache/sysds/runtime/transform/encode/ColumnEncoderPassThrough.java
##########
@@ -0,0 +1,91 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.sysds.runtime.transform.encode;
+
+import org.apache.sysds.common.Types.ValueType;
+import org.apache.sysds.runtime.matrix.data.FrameBlock;
+import org.apache.sysds.runtime.matrix.data.MatrixBlock;
+import org.apache.sysds.runtime.util.UtilFunctions;
+
+/**
+ * Simple composite encoder that applies a list of encoders in specified
order. By implementing the default encoder API
+ * it can be used as a drop-in replacement for any other encoder.
+ *
+ */
Review comment:
Is the comment correct?
##########
File path:
src/main/java/org/apache/sysds/runtime/transform/encode/ColumnEncoderComposite.java
##########
@@ -0,0 +1,268 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+
+package org.apache.sysds.runtime.transform.encode;
+
+import java.io.IOException;
+import java.io.ObjectInput;
+import java.io.ObjectOutput;
+import java.util.ArrayList;
+import java.util.List;
+import java.util.Objects;
+
+import org.apache.sysds.runtime.DMLRuntimeException;
+import org.apache.sysds.runtime.matrix.data.FrameBlock;
+import org.apache.sysds.runtime.matrix.data.MatrixBlock;
+
+/**
+ * Simple composite encoder that applies a list of encoders in specified
order. By implementing the default encoder API
+ * it can be used as a drop-in replacement for any other encoder.
+ *
+ */
+// TODO assert each type of encoder can only be present once
+public class ColumnEncoderComposite extends ColumnEncoder {
+ private static final long serialVersionUID = -8473768154646831882L;
+
+ private List<ColumnEncoder> _columnEncoders = null;
+ private FrameBlock _meta = null;
+
+ public ColumnEncoderComposite() {
+ super(-1);
+ }
+
+ public ColumnEncoderComposite(List<ColumnEncoder> columnEncoders,
FrameBlock meta) {
+ super(-1);
+ if(!(columnEncoders.size() > 0 &&
+ columnEncoders.stream().allMatch((encoder ->
encoder._colID == columnEncoders.get(0)._colID))))
+ throw new DMLRuntimeException("Tried to create
Composite Encoder with no encoders or mismatching columIDs");
+ _colID = columnEncoders.get(0)._colID;
+ _meta = meta;
+ _columnEncoders = columnEncoders;
+ }
+
+ public ColumnEncoderComposite(List<ColumnEncoder> columnEncoders) {
+ this(columnEncoders, null);
+ }
+
+ public ColumnEncoderComposite(ColumnEncoder columnEncoder) {
+ super(columnEncoder._colID);
+ _columnEncoders = new ArrayList<>();
+ _columnEncoders.add(columnEncoder);
+ }
+
+ public List<ColumnEncoder> getEncoders() {
+ return _columnEncoders;
+ }
+
+ public <T extends ColumnEncoder> T getEncoder(Class<T> type) {
+ for(ColumnEncoder columnEncoder : _columnEncoders) {
+ if(columnEncoder.getClass().equals(type))
+ return (T) columnEncoder;
Review comment:
Please fix the warnings.
##########
File path: src/main/java/org/apache/sysds/runtime/transform/encode/Encoder.java
##########
@@ -1,274 +1,67 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
Review comment:
Please add the copyright.
##########
File path: src/main/java/org/apache/sysds/runtime/transform/encode/Encoder.java
##########
@@ -1,274 +1,67 @@
-/*
- * Licensed to the Apache Software Foundation (ASF) under one
- * or more contributor license agreements. See the NOTICE file
- * distributed with this work for additional information
- * regarding copyright ownership. The ASF licenses this file
- * to you under the Apache License, Version 2.0 (the
- * "License"); you may not use this file except in compliance
- * with the License. You may obtain a copy of the License at
- *
- * http://www.apache.org/licenses/LICENSE-2.0
- *
- * Unless required by applicable law or agreed to in writing,
- * software distributed under the License is distributed on an
- * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
- * KIND, either express or implied. See the License for the
- * specific language governing permissions and limitations
- * under the License.
- */
-
package org.apache.sysds.runtime.transform.encode;
import java.io.Externalizable;
-import java.io.IOException;
-import java.io.ObjectInput;
-import java.io.ObjectOutput;
-import java.util.ArrayList;
-import java.util.Arrays;
-import java.util.HashSet;
-import java.util.List;
-import java.util.Set;
-import org.apache.commons.logging.Log;
-import org.apache.commons.logging.LogFactory;
-import org.apache.sysds.runtime.DMLRuntimeException;
import org.apache.sysds.runtime.matrix.data.FrameBlock;
import org.apache.sysds.runtime.matrix.data.MatrixBlock;
-import org.apache.sysds.runtime.util.IndexRange;
-import org.apache.sysds.runtime.util.UtilFunctions;
-import org.apache.wink.json4j.JSONArray;
-/**
- * Base class for all transform encoders providing both a row and block
- * interface for decoding frames to matrices.
- *
+/*
+Interface for all Encoder like objects
*/
-public abstract class Encoder implements Externalizable
-{
- private static final long serialVersionUID = 2299156350718979064L;
- protected static final Log LOG =
LogFactory.getLog(Encoder.class.getName());
-
- protected int _clen = -1;
- protected int[] _colList = null;
-
- protected Encoder( int[] colList, int clen ) {
- _colList = colList;
- _clen = clen;
- }
-
- public int[] getColList() {
- return _colList;
- }
-
- public void setColList(int[] colList) {
- _colList = colList;
- }
-
- public int getNumCols() {
- return _clen;
- }
-
- public int initColList(JSONArray attrs) {
- _colList = new int[attrs.size()];
- for(int i=0; i < _colList.length; i++)
- _colList[i] = UtilFunctions.toInt(attrs.get(i));
- return _colList.length;
- }
-
- public int initColList(int[] colList) {
- _colList = colList;
- return _colList.length;
- }
-
- /**
- * Indicates if this encoder is applicable, i.e, if there is at
- * least one column to encode.
- *
- * @return true if at least one column to encode
- */
- public boolean isApplicable() {
- return (_colList != null && _colList.length > 0);
- }
- /**
- * Indicates if this encoder is applicable for the given column ID,
- * i.e., if it is subject to this transformation.
- *
- * @param colID column ID
- * @return true if encoder is applicable for given column
- */
- public int isApplicable(int colID) {
- if(_colList == null)
- return -1;
- int idx = Arrays.binarySearch(_colList, colID);
- return ( idx >= 0 ? idx : -1);
- }
+public interface Encoder extends Externalizable {
/**
- * Block encode: build and apply (transform encode).
+ * Build the transform meta data for the given block input. This call
modifies and keeps meta data as encoder state.
*
* @param in input frame block
- * @param out output matrix block
- * @return output matrix block
*/
- public abstract MatrixBlock encode(FrameBlock in, MatrixBlock out);
+ void build(FrameBlock in);
/**
- * Build the transform meta data for the given block input. This call
modifies
- * and keeps meta data as encoder state.
+ * Apply the generated metadata to the FrameBlock and saved the result
in out.
*
- * @param in input frame block
- */
- public abstract void build(FrameBlock in);
-
- /**
- * Allocates internal data structures for partial build.
- */
- public void prepareBuildPartial() {
- //do nothing
- }
-
- /**
- * Partial build of internal data structures (e.g., in distributed
spark operations).
- *
- * @param in input frame block
- */
- public void buildPartial(FrameBlock in) {
- //do nothing
- }
-
- /**
- * Encode input data blockwise according to existing transform meta
- * data (transform apply).
- *
- * @param in input frame block
- * @param out output matrix block
+ * @param in input frame block
+ * @param out output matrix block
+ * @param outputCol is a offset in the output matrix. column in
FrameBlock + outputCol = column in out
* @return output matrix block
*/
- public abstract MatrixBlock apply(FrameBlock in, MatrixBlock out);
-
- protected int[] subRangeColList(IndexRange ixRange) {
- List<Integer> cols = new ArrayList<>();
- for(int col : _colList) {
- if(ixRange.inColRange(col)) {
- // add the correct column, removed columns
before start
- // colStart - 1 because colStart is 1-based
- int corrColumn = (int) (col - (ixRange.colStart
- 1));
- cols.add(corrColumn);
- }
- }
- return cols.stream().mapToInt(i -> i).toArray();
- }
-
- /**
- * Returns a new Encoder that only handles a sub range of columns.
- *
- * @param ixRange the range (1-based, begin inclusive, end exclusive)
- * @return an encoder of the same type, just for the sub-range
- */
- public Encoder subRangeEncoder(IndexRange ixRange) {
- throw new DMLRuntimeException(
- this.getClass().getSimpleName() + " does not support
the creation of a sub-range encoder");
- }
-
- /**
- * Merges the column information, like how many columns the frame needs
and which columns this encoder operates on.
- *
- * @param other the other encoder of the same type
- * @param col column at which the second encoder will be merged in
(1-based)
- */
- protected void mergeColumnInfo(Encoder other, int col) {
- // update number of columns
- _clen = Math.max(_clen, col - 1 + other._clen);
-
- // update the new columns that this encoder operates on
- Set<Integer> colListAgg = new HashSet<>(); // for dedup
- for(int i : _colList)
- colListAgg.add(i);
- for(int i : other._colList)
- colListAgg.add(col - 1 + i);
- _colList = colListAgg.stream().mapToInt(i -> i).toArray();
- }
-
- /**
- * Merges another encoder, of a compatible type, in after a certain
position. Resizes as necessary.
- * <code>Encoders</code> are compatible with themselves and
<code>EncoderComposite</code> is compatible with every
- * other <code>Encoder</code>.
- *
- * @param other the encoder that should be merged in
- * @param row the row where it should be placed (1-based)
- * @param col the col where it should be placed (1-based)
- */
- public void mergeAt(Encoder other, int row, int col) {
- throw new DMLRuntimeException(
- this.getClass().getSimpleName() + " does not support
merging with " + other.getClass().getSimpleName());
- }
-
- /**
- * Update index-ranges to after encoding. Note that only Dummycoding
changes the ranges.
- *
- * @param beginDims begin dimensions of range
- * @param endDims end dimensions of range
- */
- public void updateIndexRanges(long[] beginDims, long[] endDims) {
- // do nothing - default
- }
+ MatrixBlock apply(FrameBlock in, MatrixBlock out, int outputCol);
/**
* Construct a frame block out of the transform meta data.
*
* @param out output frame block
* @return output frame block?
*/
- public abstract FrameBlock getMetaData(FrameBlock out);
+ FrameBlock getMetaData(FrameBlock out);
/**
* Sets up the required meta data for a subsequent call to apply.
*
* @param meta frame block
*/
- public abstract void initMetaData(FrameBlock meta);
+ void initMetaData(FrameBlock meta);
/**
- * Obtain the column mapping of encoded frames based on the passed
- * meta data frame.
- *
- * @param meta meta data frame block
- * @param out output matrix
- * @return matrix with column mapping (one row per attribute)
+ * Allocates internal data structures for partial build.
*/
- public MatrixBlock getColMapping(FrameBlock meta, MatrixBlock out) {
- //default: do nothing
- return out;
- }
+ void prepareBuildPartial();
/**
- * Redirects the default java serialization via externalizable to our
default
- * hadoop writable serialization for efficient broadcast/rdd
serialization.
+ * Partial build of internal data structures (e.g., in distributed
spark operations).
*
- * @param os object output
- * @throws IOException if IOException occurs
+ * @param in input frame block
*/
- @Override
- public void writeExternal(ObjectOutput os) throws IOException {
- os.writeInt(_clen);
- os.writeInt(_colList.length);
- for(int col : _colList)
- os.writeInt(col);
- }
+ void buildPartial(FrameBlock in);
/**
- * Redirects the default java serialization via externalizable to our
default
- * hadoop writable serialization for efficient broadcast/rdd
deserialization.
+ * Update index-ranges to after encoding. Note that only Dummycoding
changes the ranges.
*
- * @param in object input
- * @throws IOException if IOException occur
+ * @param beginDims begin dimensions of range
Review comment:
Replace the spaces with tabs (function definitions).
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]