gaoyunhaii commented on a change in pull request #27: URL: https://github.com/apache/flink-ml/pull/27#discussion_r749874117
########## File path: flink-ml-api/src/main/java/org/apache/flink/ml/distance/DistanceMeasure.java ########## @@ -0,0 +1,42 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.flink.ml.distance; + +import org.apache.flink.ml.linalg.Vector; + +import java.io.Serializable; + +/** Interface for measuring distance between two vectors. */ +public interface DistanceMeasure extends Serializable { + + static DistanceMeasure getInstance(String distanceMeasure) { + if (distanceMeasure.equals("euclidean")) { + return EuclideanDistanceMeasure.getInstance(); + } + throw new IllegalArgumentException( Review comment: Would it be better to also give the supported options in the exception body ? ########## File path: flink-ml-lib/src/main/java/org/apache/flink/ml/common/datastream/MapPartitionFunctionWrapper.java ########## @@ -0,0 +1,79 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.flink.ml.common.datastream; + +import org.apache.flink.api.common.functions.MapPartitionFunction; +import org.apache.flink.api.common.state.ListState; +import org.apache.flink.api.common.state.ListStateDescriptor; +import org.apache.flink.api.common.typeinfo.TypeInformation; +import org.apache.flink.runtime.state.StateInitializationContext; +import org.apache.flink.streaming.api.operators.AbstractStreamOperator; +import org.apache.flink.streaming.api.operators.BoundedOneInput; +import org.apache.flink.streaming.api.operators.OneInputStreamOperator; +import org.apache.flink.streaming.runtime.streamrecord.StreamRecord; +import org.apache.flink.util.Collector; + +/** + * This utility class wraps a MapPartitionFunction into an OneInputStreamOperator so that a + * MapPartitionFunction can be applied on a DataStream via the DataStream::transform API. + * + * @param <IN> The class type of the input element. + * @param <OUT> The class type of the output element. + */ +public class MapPartitionFunctionWrapper<IN, OUT> extends AbstractStreamOperator<OUT> + implements OneInputStreamOperator<IN, OUT>, BoundedOneInput { + private final ListStateDescriptor<IN> descriptor; + private final MapPartitionFunction<IN, OUT> mapPartitionFunc; + private ListState<IN> values; + + public MapPartitionFunctionWrapper( + TypeInformation<IN> typeInfo, MapPartitionFunction<IN, OUT> mapPartitionFunc) { + this.descriptor = new ListStateDescriptor<>("input", typeInfo); Review comment: Logically we do not need users to pass the `typeInfo` and we could create the descriptor in `initializeState` via ``` new ListStateDescriptor<>( "input", getOperatorConfig().getTypeSerializerIn(0, getClass().getClassLoader())) ``` ########## File path: flink-ml-api/src/main/java/org/apache/flink/ml/linalg/typeinfo/DenseVectorSerializer.java ########## @@ -0,0 +1,133 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.flink.ml.linalg.typeinfo; + +import org.apache.flink.api.common.typeutils.SimpleTypeSerializerSnapshot; +import org.apache.flink.api.common.typeutils.TypeSerializerSnapshot; +import org.apache.flink.api.common.typeutils.base.TypeSerializerSingleton; +import org.apache.flink.core.memory.DataInputView; +import org.apache.flink.core.memory.DataOutputView; +import org.apache.flink.ml.linalg.DenseVector; + +import java.io.IOException; +import java.util.Arrays; + +/** Specialized serializer for {@code DenseVector}. */ +public final class DenseVectorSerializer extends TypeSerializerSingleton<DenseVector> { + + private static final long serialVersionUID = 1L; + + private static final double[] EMPTY = new double[0]; + + private static final DenseVectorSerializer INSTANCE = new DenseVectorSerializer(); + + @Override + public boolean isImmutableType() { + return false; + } + + @Override + public DenseVector createInstance() { + return new DenseVector(EMPTY); + } + + @Override + public DenseVector copy(DenseVector from) { + return new DenseVector(Arrays.copyOf(from.values, from.values.length)); + } + + @Override + public DenseVector copy(DenseVector from, DenseVector reuse) { + if (from.values.length == reuse.values.length) { + System.arraycopy(from.values, 0, reuse.values, 0, from.values.length); + return reuse; + } + return copy(from); + } + + @Override + public int getLength() { + return -1; + } + + @Override + public void serialize(DenseVector vector, DataOutputView target) throws IOException { + if (vector == null) { + throw new IllegalArgumentException("The vector must not be null."); + } + + final int len = vector.values.length; + target.writeInt(len); + for (int i = 0; i < len; i++) { + target.writeDouble(vector.get(i)); + } + } + + @Override + public DenseVector deserialize(DataInputView source) throws IOException { + int len = source.readInt(); + double[] values = new double[len]; + for (int i = 0; i < len; i++) { + values[i] = source.readDouble(); + } + return new DenseVector(values); + } + + @Override + public DenseVector deserialize(DenseVector reuse, DataInputView source) throws IOException { + int len = source.readInt(); + if (len == reuse.values.length) { + for (int i = 0; i < len; i++) { + reuse.values[i] = source.readDouble(); + } + return reuse; + } + + double[] values = new double[len]; Review comment: Is it possible to extract a private utility method for read double array? ########## File path: flink-ml-lib/src/main/java/org/apache/flink/ml/param/HasFeaturesCol.java ########## @@ -0,0 +1,33 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.flink.ml.param; + +/** Interface for the shared featuresCol param. */ +public interface HasFeaturesCol<T> extends WithParams<T> { + Param<String> FEATURES_COL = new StringParam("featuresCol", "Features column name", "features"); + + default String getFeaturesCol() { Review comment: Similarly for this property. ########## File path: flink-ml-api/src/main/java/org/apache/flink/ml/linalg/Vectors.java ########## @@ -0,0 +1,28 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.flink.ml.linalg; + +/** Utility methods for instantiating Vector. */ +public class Vectors { + + /** Creates a dense vector from its values. */ + public static DenseVector dense(double... value) { Review comment: `value` -> `values` ? ########## File path: flink-ml-lib/src/main/java/org/apache/flink/ml/param/HasPredictionCol.java ########## @@ -0,0 +1,34 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.flink.ml.param; + +/** Interface for the shared featuresCol param. */ +public interface HasPredictionCol<T> extends WithParams<T> { + Param<String> PREDICTION_COL = + new StringParam("predictionCol", "Prediction column name", "prediction"); + + default String getPredictionCol() { + return get(PREDICTION_COL); Review comment: It seems with `alwaysTrue` validation it is possible to have a null value? Is it as expected ? Since it seems we do not check nullability in where we use the param. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected]
