zhipeng93 commented on code in PR #135:
URL: https://github.com/apache/flink-ml/pull/135#discussion_r935349000
##########
flink-ml-core/src/test/java/org/apache/flink/ml/api/StageTest.java:
##########
@@ -82,6 +85,9 @@ public class StageTest {
Param<Integer[]> INT_ARRAY_PARAM =
new IntArrayParam("intArrayParam", "Description", new
Integer[] {6, 7});
+ Param<Vector> VECTOR_PARAM =
Review Comment:
nit: How about moving `Vector_PARAM` to the end of function, i.e., Line#121,
such that we test **all** the ArrayParams and then VectorParams?
Same for other similar cases in Python and Java.
##########
flink-ml-lib/src/main/java/org/apache/flink/ml/feature/elementwiseproduct/ElementwiseProduct.java:
##########
@@ -0,0 +1,112 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.flink.ml.feature.elementwiseproduct;
+
+import org.apache.flink.api.common.functions.MapFunction;
+import org.apache.flink.api.java.typeutils.RowTypeInfo;
+import org.apache.flink.ml.api.Transformer;
+import org.apache.flink.ml.common.datastream.TableUtils;
+import org.apache.flink.ml.linalg.BLAS;
+import org.apache.flink.ml.linalg.Vector;
+import org.apache.flink.ml.linalg.typeinfo.VectorTypeInfo;
+import org.apache.flink.ml.param.Param;
+import org.apache.flink.ml.util.ParamUtils;
+import org.apache.flink.ml.util.ReadWriteUtils;
+import org.apache.flink.streaming.api.datastream.DataStream;
+import org.apache.flink.table.api.Table;
+import org.apache.flink.table.api.bridge.java.StreamTableEnvironment;
+import org.apache.flink.table.api.internal.TableImpl;
+import org.apache.flink.types.Row;
+import org.apache.flink.util.Preconditions;
+
+import org.apache.commons.lang3.ArrayUtils;
+
+import java.io.IOException;
+import java.util.HashMap;
+import java.util.Map;
+
+/**
+ * An transformer that multiplies each input vector with a given scaling
vector using Hadamard
Review Comment:
nit: To be consistent with the existing java docs, how about make it `A
Transformer that multiplies...`?
##########
flink-ml-lib/src/test/java/org/apache/flink/ml/feature/ElementwiseProductTest.java:
##########
@@ -0,0 +1,207 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.flink.ml.feature;
+
+import org.apache.flink.api.common.restartstrategy.RestartStrategies;
+import org.apache.flink.configuration.Configuration;
+import org.apache.flink.ml.feature.elementwiseproduct.ElementwiseProduct;
+import org.apache.flink.ml.linalg.DenseVector;
+import org.apache.flink.ml.linalg.SparseVector;
+import org.apache.flink.ml.linalg.Vectors;
+import org.apache.flink.ml.util.TestUtils;
+import org.apache.flink.streaming.api.datastream.DataStream;
+import
org.apache.flink.streaming.api.environment.ExecutionCheckpointingOptions;
+import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
+import org.apache.flink.table.api.Table;
+import org.apache.flink.table.api.bridge.java.StreamTableEnvironment;
+import org.apache.flink.test.util.AbstractTestBase;
+import org.apache.flink.types.Row;
+
+import org.apache.commons.collections.IteratorUtils;
+import org.junit.Assert;
+import org.junit.Before;
+import org.junit.Test;
+
+import java.util.Arrays;
+import java.util.List;
+
+import static org.junit.Assert.assertArrayEquals;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNull;
+
+/** Tests {@link ElementwiseProduct}. */
+public class ElementwiseProductTest extends AbstractTestBase {
+
+ private StreamTableEnvironment tEnv;
+ private Table inputDataTable;
+
+ private static final List<Row> INPUT_DATA =
+ Arrays.asList(
+ Row.of(
+ 0,
+ Vectors.dense(2.1, 3.1),
+ Vectors.sparse(5, new int[] {3}, new double[]
{1.0})),
+ Row.of(
+ 1,
+ Vectors.dense(1.1, 3.3),
+ Vectors.sparse(
+ 5, new int[] {4, 2, 3, 1}, new double[]
{4.0, 2.0, 3.0, 1.0})),
+ Row.of(2, null, null));
+
+ private static final double[] EXPECTED_OUTPUT_DENSE_VEC_ARRAY_1 = new
double[] {2.31, 3.41};
+ private static final double[] EXPECTED_OUTPUT_DENSE_VEC_ARRAY_2 = new
double[] {1.21, 3.63};
+
+ private static final int EXPECTED_OUTPUT_SPARSE_VEC_SIZE_1 = 5;
+ private static final int[] EXPECTED_OUTPUT_SPARSE_VEC_INDICES_1 = new
int[] {3};
+ private static final double[] EXPECTED_OUTPUT_SPARSE_VEC_VALUES_1 = new
double[] {0.0};
+
+ private static final int EXPECTED_OUTPUT_SPARSE_VEC_SIZE_2 = 5;
+ private static final int[] EXPECTED_OUTPUT_SPARSE_VEC_INDICES_2 = new
int[] {1, 2, 3, 4};
+ private static final double[] EXPECTED_OUTPUT_SPARSE_VEC_VALUES_2 =
+ new double[] {1.1, 0.0, 0.0, 0.0};
+
+ @Before
+ public void before() {
+ Configuration config = new Configuration();
+
config.set(ExecutionCheckpointingOptions.ENABLE_CHECKPOINTS_AFTER_TASKS_FINISH,
true);
+ StreamExecutionEnvironment env =
StreamExecutionEnvironment.getExecutionEnvironment(config);
+ env.setParallelism(4);
+ env.enableCheckpointing(100);
+ env.setRestartStrategy(RestartStrategies.noRestart());
+ tEnv = StreamTableEnvironment.create(env);
+ DataStream<Row> dataStream = env.fromCollection(INPUT_DATA);
+ inputDataTable = tEnv.fromDataStream(dataStream).as("id", "vec",
"sparseVec");
+ }
+
+ private void verifyOutputResult(Table output, String outputCol, boolean
isSparse)
+ throws Exception {
+ DataStream<Row> dataStream = tEnv.toDataStream(output);
+ List<Row> results =
IteratorUtils.toList(dataStream.executeAndCollect());
+ assertEquals(3, results.size());
+ for (Row result : results) {
+ if (result.getField(0) == (Object) 0) {
+ if (isSparse) {
+ SparseVector sparseVector = (SparseVector)
result.getField(outputCol);
+ assertEquals(EXPECTED_OUTPUT_SPARSE_VEC_SIZE_1,
sparseVector.size());
+ assertArrayEquals(EXPECTED_OUTPUT_SPARSE_VEC_INDICES_1,
sparseVector.indices);
+ assertArrayEquals(
+ EXPECTED_OUTPUT_SPARSE_VEC_VALUES_1,
sparseVector.values, 1.0e-5);
+ } else {
+ assertArrayEquals(
+ EXPECTED_OUTPUT_DENSE_VEC_ARRAY_1,
+ ((DenseVector) result.getField(outputCol)).values,
+ 1.0e-5);
+ }
+ } else if (result.getField(0) == (Object) 1) {
+ if (isSparse) {
+ SparseVector sparseVector = (SparseVector)
result.getField(outputCol);
+ assertEquals(EXPECTED_OUTPUT_SPARSE_VEC_SIZE_2,
sparseVector.size());
+ assertArrayEquals(EXPECTED_OUTPUT_SPARSE_VEC_INDICES_2,
sparseVector.indices);
+ assertArrayEquals(
+ EXPECTED_OUTPUT_SPARSE_VEC_VALUES_2,
sparseVector.values, 1.0e-5);
+ } else {
+ assertArrayEquals(
+ EXPECTED_OUTPUT_DENSE_VEC_ARRAY_2,
+ ((DenseVector) result.getField(outputCol)).values,
+ 1.0e-5);
+ }
+ } else if (result.getField(0) == (Object) 2) {
+ assertNull(result.getField(outputCol));
+ } else {
+ throw new UnsupportedOperationException("Input data id not
exists.");
+ }
+ }
+ }
+
+ @Test
+ public void testParam() {
+ ElementwiseProduct elementwiseProduct = new ElementwiseProduct();
+ assertEquals("output", elementwiseProduct.getOutputCol());
+ assertEquals("input", elementwiseProduct.getInputCol());
+
+ elementwiseProduct
+ .setInputCol("vec")
+ .setOutputCol("outputVec")
+ .setScalingVec(Vectors.dense(1.0, 2.0, 3.0));
+ assertEquals("vec", elementwiseProduct.getInputCol());
+ assertEquals(Vectors.dense(1.0, 2.0, 3.0),
elementwiseProduct.getScalingVec());
+ assertEquals("outputVec", elementwiseProduct.getOutputCol());
+ }
+
+ @Test
+ public void testOutputSchema() {
+ ElementwiseProduct elementwiseProduct =
+ new ElementwiseProduct()
+ .setInputCol("vec")
+ .setOutputCol("outputVec")
+ .setScalingVec(Vectors.dense(1.0, 2.0, 3.0));
+ Table output = elementwiseProduct.transform(inputDataTable)[0];
+ assertEquals(
+ Arrays.asList("id", "vec", "sparseVec", "outputVec"),
+ output.getResolvedSchema().getColumnNames());
+ }
+
+ @Test
+ public void testSaveLoadAndTransformDense() throws Exception {
+ ElementwiseProduct elementwiseProduct =
+ new ElementwiseProduct()
+ .setInputCol("vec")
+ .setOutputCol("outputVec")
+ .setScalingVec(Vectors.dense(1.1, 1.1));
+ ElementwiseProduct loadedElementwiseProduct =
+ TestUtils.saveAndReload(
+ tEnv, elementwiseProduct,
TEMPORARY_FOLDER.newFolder().getAbsolutePath());
+ Table output = loadedElementwiseProduct.transform(inputDataTable)[0];
+ verifyOutputResult(output, loadedElementwiseProduct.getOutputCol(),
false);
+ }
+
+ @Test
+ public void testVectorSizeNotEquals() {
+ try {
+ ElementwiseProduct elementwiseProduct =
+ new ElementwiseProduct()
+ .setInputCol("vec")
+ .setOutputCol("outputVec")
+ .setScalingVec(Vectors.dense(1.1, 1.1, 2.0));
+ Table output = elementwiseProduct.transform(inputDataTable)[0];
+ DataStream<Row> dataStream = tEnv.toDataStream(output);
+ IteratorUtils.toList(dataStream.executeAndCollect());
+ Assert.fail("Expected IllegalArgumentException");
Review Comment:
It is not an illegalArgumentException here. It is an IllegalState and the
code never runs here.
How about replace this line with `fail()`?
##########
flink-ml-lib/src/test/java/org/apache/flink/ml/feature/ElementwiseProductTest.java:
##########
@@ -0,0 +1,207 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.flink.ml.feature;
+
+import org.apache.flink.api.common.restartstrategy.RestartStrategies;
+import org.apache.flink.configuration.Configuration;
+import org.apache.flink.ml.feature.elementwiseproduct.ElementwiseProduct;
+import org.apache.flink.ml.linalg.DenseVector;
+import org.apache.flink.ml.linalg.SparseVector;
+import org.apache.flink.ml.linalg.Vectors;
+import org.apache.flink.ml.util.TestUtils;
+import org.apache.flink.streaming.api.datastream.DataStream;
+import
org.apache.flink.streaming.api.environment.ExecutionCheckpointingOptions;
+import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
+import org.apache.flink.table.api.Table;
+import org.apache.flink.table.api.bridge.java.StreamTableEnvironment;
+import org.apache.flink.test.util.AbstractTestBase;
+import org.apache.flink.types.Row;
+
+import org.apache.commons.collections.IteratorUtils;
+import org.junit.Assert;
+import org.junit.Before;
+import org.junit.Test;
+
+import java.util.Arrays;
+import java.util.List;
+
+import static org.junit.Assert.assertArrayEquals;
+import static org.junit.Assert.assertEquals;
+import static org.junit.Assert.assertNull;
+
+/** Tests {@link ElementwiseProduct}. */
+public class ElementwiseProductTest extends AbstractTestBase {
+
+ private StreamTableEnvironment tEnv;
+ private Table inputDataTable;
+
+ private static final List<Row> INPUT_DATA =
+ Arrays.asList(
+ Row.of(
+ 0,
+ Vectors.dense(2.1, 3.1),
+ Vectors.sparse(5, new int[] {3}, new double[]
{1.0})),
+ Row.of(
+ 1,
+ Vectors.dense(1.1, 3.3),
+ Vectors.sparse(
+ 5, new int[] {4, 2, 3, 1}, new double[]
{4.0, 2.0, 3.0, 1.0})),
+ Row.of(2, null, null));
+
+ private static final double[] EXPECTED_OUTPUT_DENSE_VEC_ARRAY_1 = new
double[] {2.31, 3.41};
+ private static final double[] EXPECTED_OUTPUT_DENSE_VEC_ARRAY_2 = new
double[] {1.21, 3.63};
+
+ private static final int EXPECTED_OUTPUT_SPARSE_VEC_SIZE_1 = 5;
+ private static final int[] EXPECTED_OUTPUT_SPARSE_VEC_INDICES_1 = new
int[] {3};
+ private static final double[] EXPECTED_OUTPUT_SPARSE_VEC_VALUES_1 = new
double[] {0.0};
+
+ private static final int EXPECTED_OUTPUT_SPARSE_VEC_SIZE_2 = 5;
+ private static final int[] EXPECTED_OUTPUT_SPARSE_VEC_INDICES_2 = new
int[] {1, 2, 3, 4};
+ private static final double[] EXPECTED_OUTPUT_SPARSE_VEC_VALUES_2 =
+ new double[] {1.1, 0.0, 0.0, 0.0};
+
+ @Before
+ public void before() {
+ Configuration config = new Configuration();
+
config.set(ExecutionCheckpointingOptions.ENABLE_CHECKPOINTS_AFTER_TASKS_FINISH,
true);
+ StreamExecutionEnvironment env =
StreamExecutionEnvironment.getExecutionEnvironment(config);
+ env.setParallelism(4);
+ env.enableCheckpointing(100);
+ env.setRestartStrategy(RestartStrategies.noRestart());
+ tEnv = StreamTableEnvironment.create(env);
+ DataStream<Row> dataStream = env.fromCollection(INPUT_DATA);
+ inputDataTable = tEnv.fromDataStream(dataStream).as("id", "vec",
"sparseVec");
+ }
+
+ private void verifyOutputResult(Table output, String outputCol, boolean
isSparse)
+ throws Exception {
+ DataStream<Row> dataStream = tEnv.toDataStream(output);
+ List<Row> results =
IteratorUtils.toList(dataStream.executeAndCollect());
+ assertEquals(3, results.size());
+ for (Row result : results) {
+ if (result.getField(0) == (Object) 0) {
+ if (isSparse) {
+ SparseVector sparseVector = (SparseVector)
result.getField(outputCol);
+ assertEquals(EXPECTED_OUTPUT_SPARSE_VEC_SIZE_1,
sparseVector.size());
+ assertArrayEquals(EXPECTED_OUTPUT_SPARSE_VEC_INDICES_1,
sparseVector.indices);
+ assertArrayEquals(
+ EXPECTED_OUTPUT_SPARSE_VEC_VALUES_1,
sparseVector.values, 1.0e-5);
+ } else {
+ assertArrayEquals(
+ EXPECTED_OUTPUT_DENSE_VEC_ARRAY_1,
+ ((DenseVector) result.getField(outputCol)).values,
+ 1.0e-5);
+ }
+ } else if (result.getField(0) == (Object) 1) {
+ if (isSparse) {
+ SparseVector sparseVector = (SparseVector)
result.getField(outputCol);
+ assertEquals(EXPECTED_OUTPUT_SPARSE_VEC_SIZE_2,
sparseVector.size());
+ assertArrayEquals(EXPECTED_OUTPUT_SPARSE_VEC_INDICES_2,
sparseVector.indices);
+ assertArrayEquals(
+ EXPECTED_OUTPUT_SPARSE_VEC_VALUES_2,
sparseVector.values, 1.0e-5);
+ } else {
+ assertArrayEquals(
+ EXPECTED_OUTPUT_DENSE_VEC_ARRAY_2,
+ ((DenseVector) result.getField(outputCol)).values,
+ 1.0e-5);
+ }
+ } else if (result.getField(0) == (Object) 2) {
+ assertNull(result.getField(outputCol));
+ } else {
+ throw new UnsupportedOperationException("Input data id not
exists.");
+ }
+ }
+ }
+
+ @Test
+ public void testParam() {
+ ElementwiseProduct elementwiseProduct = new ElementwiseProduct();
+ assertEquals("output", elementwiseProduct.getOutputCol());
+ assertEquals("input", elementwiseProduct.getInputCol());
+
+ elementwiseProduct
+ .setInputCol("vec")
+ .setOutputCol("outputVec")
+ .setScalingVec(Vectors.dense(1.0, 2.0, 3.0));
+ assertEquals("vec", elementwiseProduct.getInputCol());
+ assertEquals(Vectors.dense(1.0, 2.0, 3.0),
elementwiseProduct.getScalingVec());
+ assertEquals("outputVec", elementwiseProduct.getOutputCol());
+ }
+
+ @Test
+ public void testOutputSchema() {
+ ElementwiseProduct elementwiseProduct =
+ new ElementwiseProduct()
+ .setInputCol("vec")
+ .setOutputCol("outputVec")
+ .setScalingVec(Vectors.dense(1.0, 2.0, 3.0));
+ Table output = elementwiseProduct.transform(inputDataTable)[0];
+ assertEquals(
+ Arrays.asList("id", "vec", "sparseVec", "outputVec"),
+ output.getResolvedSchema().getColumnNames());
+ }
+
+ @Test
+ public void testSaveLoadAndTransformDense() throws Exception {
+ ElementwiseProduct elementwiseProduct =
+ new ElementwiseProduct()
+ .setInputCol("vec")
+ .setOutputCol("outputVec")
+ .setScalingVec(Vectors.dense(1.1, 1.1));
+ ElementwiseProduct loadedElementwiseProduct =
+ TestUtils.saveAndReload(
+ tEnv, elementwiseProduct,
TEMPORARY_FOLDER.newFolder().getAbsolutePath());
+ Table output = loadedElementwiseProduct.transform(inputDataTable)[0];
+ verifyOutputResult(output, loadedElementwiseProduct.getOutputCol(),
false);
+ }
+
+ @Test
+ public void testVectorSizeNotEquals() {
+ try {
+ ElementwiseProduct elementwiseProduct =
+ new ElementwiseProduct()
+ .setInputCol("vec")
+ .setOutputCol("outputVec")
+ .setScalingVec(Vectors.dense(1.1, 1.1, 2.0));
+ Table output = elementwiseProduct.transform(inputDataTable)[0];
+ DataStream<Row> dataStream = tEnv.toDataStream(output);
+ IteratorUtils.toList(dataStream.executeAndCollect());
+ Assert.fail("Expected IllegalArgumentException");
+ } catch (Exception e) {
+ assertEquals(
+ "Vector size mismatched.",
+
e.getCause().getCause().getCause().getCause().getCause().getMessage());
Review Comment:
How about using `ExceptionUtils.getRootCause(e).getMessage())`?
##########
flink-ml-lib/src/main/java/org/apache/flink/ml/feature/elementwiseproduct/ElementwiseProduct.java:
##########
@@ -0,0 +1,112 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.flink.ml.feature.elementwiseproduct;
+
+import org.apache.flink.api.common.functions.MapFunction;
+import org.apache.flink.api.java.typeutils.RowTypeInfo;
+import org.apache.flink.ml.api.Transformer;
+import org.apache.flink.ml.common.datastream.TableUtils;
+import org.apache.flink.ml.linalg.BLAS;
+import org.apache.flink.ml.linalg.Vector;
+import org.apache.flink.ml.linalg.typeinfo.VectorTypeInfo;
+import org.apache.flink.ml.param.Param;
+import org.apache.flink.ml.util.ParamUtils;
+import org.apache.flink.ml.util.ReadWriteUtils;
+import org.apache.flink.streaming.api.datastream.DataStream;
+import org.apache.flink.table.api.Table;
+import org.apache.flink.table.api.bridge.java.StreamTableEnvironment;
+import org.apache.flink.table.api.internal.TableImpl;
+import org.apache.flink.types.Row;
+import org.apache.flink.util.Preconditions;
+
+import org.apache.commons.lang3.ArrayUtils;
+
+import java.io.IOException;
+import java.util.HashMap;
+import java.util.Map;
+
+/**
+ * An transformer that multiplies each input vector with a given scaling
vector using Hadamard
+ * product.
+ *
+ * <p>If the size of the input vector does not equal the size of the scaling
vector, the transformer
+ * will throw {@link IllegalArgumentException}.
+ */
+public class ElementwiseProduct
+ implements Transformer<ElementwiseProduct>,
ElementwiseProductParams<ElementwiseProduct> {
+ private final Map<Param<?>, Object> paramMap = new HashMap<>();
+
+ public ElementwiseProduct() {
+ ParamUtils.initializeMapWithDefaultValues(paramMap, this);
+ }
+
+ @Override
+ public Table[] transform(Table... inputs) {
+ Preconditions.checkArgument(inputs.length == 1);
+ StreamTableEnvironment tEnv =
+ (StreamTableEnvironment) ((TableImpl)
inputs[0]).getTableEnvironment();
+ RowTypeInfo inputTypeInfo =
TableUtils.getRowTypeInfo(inputs[0].getResolvedSchema());
+ RowTypeInfo outputTypeInfo =
+ new RowTypeInfo(
+ ArrayUtils.addAll(inputTypeInfo.getFieldTypes(),
VectorTypeInfo.INSTANCE),
+ ArrayUtils.addAll(inputTypeInfo.getFieldNames(),
getOutputCol()));
+ DataStream<Row> output =
+ tEnv.toDataStream(inputs[0])
+ .map(
+ new ElementwiseProductFunction(getInputCol(),
getScalingVec()),
+ outputTypeInfo);
+ Table outputTable = tEnv.fromDataStream(output);
+ return new Table[] {outputTable};
+ }
+
+ private static class ElementwiseProductFunction implements
MapFunction<Row, Row> {
+ private final String inputCol;
+ private final Vector scalingVec;
+
+ public ElementwiseProductFunction(String inputCol, Vector scalingVec) {
+ this.inputCol = inputCol;
+ this.scalingVec = scalingVec;
+ }
+
+ @Override
+ public Row map(Row value) {
+ Vector inputVec = value.getFieldAs(inputCol);
+ Vector retVec = (null != inputVec) ? inputVec.clone() : null;
Review Comment:
How about we check the size of the inputVec and the scaling vec before
conducting `hdot`? Then we can throw an illegalArgumentException here and the
exception seems more clear to me.
##########
flink-ml-python/pyflink/ml/lib/feature/elementwiseproduct.py:
##########
@@ -0,0 +1,73 @@
+################################################################################
+# Licensed to the Apache Software Foundation (ASF) under one
+# or more contributor license agreements. See the NOTICE file
+# distributed with this work for additional information
+# regarding copyright ownership. The ASF licenses this file
+# to you under the Apache License, Version 2.0 (the
+# "License"); you may not use this file except in compliance
+# with the License. You may obtain a copy of the License at
+#
+# http://www.apache.org/licenses/LICENSE-2.0
+#
+# Unless required by applicable law or agreed to in writing, software
+# distributed under the License is distributed on an "AS IS" BASIS,
+# WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+# See the License for the specific language governing permissions and
+# limitations under the License.
+################################################################################
+
+from pyflink.ml.core.param import ParamValidators, Param, VectorParam
+from pyflink.ml.core.wrapper import JavaWithParams
+from pyflink.ml.lib.feature.common import JavaFeatureTransformer
+from pyflink.ml.lib.param import HasInputCol, HasOutputCol
+from pyflink.ml.core.linalg import Vector
+
+
+class _ElementwiseProductParams(
+ JavaWithParams,
+ HasInputCol,
+ HasOutputCol
+):
+ """
+ Params for :class:`ElementwiseProduct`.
+ """
+
+ SCALING_VEC: Param[Vector] = VectorParam(
+ "scaling_vec",
+ "the scaling vector to multiply with input vectors using hadamard
product.",
+ None,
+ ParamValidators.not_null())
+
+ def __init__(self, java_params):
+ super(_ElementwiseProductParams, self).__init__(java_params)
+
+ def set_scaling_vec(self, value: Vector):
+ return self.set(self.SCALING_VEC, value)
+
+ def get_scaling_vec(self) -> Vector:
+ return self.get(self.SCALING_VEC)
+
+ @property
+ def scaling_vec(self) -> Vector:
+ return self.get_scaling_vec()
+
+
+class ElementwiseProduct(JavaFeatureTransformer, _ElementwiseProductParams):
+ """
+ ElementwiseProduct is a transformer that multiplies each input vector with
a
Review Comment:
nit: make the python doc consistent with java docs.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]