weibozhao commented on a change in pull request #24:
URL: https://github.com/apache/flink-ml/pull/24#discussion_r760919513
##########
File path:
flink-ml-lib/src/main/java/org/apache/flink/ml/classification/knn/KnnModel.java
##########
@@ -0,0 +1,489 @@
+package org.apache.flink.ml.classification.knn;
+
+import org.apache.flink.api.common.eventtime.WatermarkStrategy;
+import org.apache.flink.api.common.functions.RichMapFunction;
+import org.apache.flink.api.common.typeinfo.TypeInformation;
+import org.apache.flink.api.connector.source.Source;
+import org.apache.flink.api.java.tuple.Tuple2;
+import org.apache.flink.api.java.typeutils.RowTypeInfo;
+import org.apache.flink.connector.file.sink.FileSink;
+import org.apache.flink.connector.file.src.FileSource;
+import org.apache.flink.core.fs.Path;
+import org.apache.flink.ml.api.Model;
+import org.apache.flink.ml.common.broadcast.BroadcastUtils;
+import org.apache.flink.ml.linalg.DenseMatrix;
+import org.apache.flink.ml.linalg.DenseVector;
+import org.apache.flink.ml.linalg.VectorUtils;
+import org.apache.flink.ml.param.Param;
+import org.apache.flink.ml.util.ParamUtils;
+import org.apache.flink.ml.util.ReadWriteUtils;
+import org.apache.flink.streaming.api.datastream.DataStream;
+import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
+import
org.apache.flink.streaming.api.functions.sink.filesystem.bucketassigners.BasePathBucketAssigner;
+import
org.apache.flink.streaming.api.functions.sink.filesystem.rollingpolicies.OnCheckpointRollingPolicy;
+import org.apache.flink.streaming.api.operators.AbstractUdfStreamOperator;
+import org.apache.flink.streaming.api.operators.OneInputStreamOperator;
+import org.apache.flink.streaming.runtime.streamrecord.StreamRecord;
+import org.apache.flink.table.api.DataTypes;
+import org.apache.flink.table.api.Schema;
+import org.apache.flink.table.api.Table;
+import org.apache.flink.table.api.bridge.java.StreamTableEnvironment;
+import org.apache.flink.table.api.internal.TableImpl;
+import org.apache.flink.table.catalog.ResolvedSchema;
+import org.apache.flink.table.types.DataType;
+import org.apache.flink.table.types.logical.utils.LogicalTypeParser;
+import org.apache.flink.table.types.utils.LogicalTypeDataTypeConverter;
+import org.apache.flink.types.Row;
+
+import org.apache.flink.shaded.curator4.com.google.common.collect.ImmutableMap;
+import
org.apache.flink.shaded.jackson2.com.fasterxml.jackson.core.JsonProcessingException;
+
+import org.apache.commons.lang3.ArrayUtils;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.PriorityQueue;
+import java.util.TreeMap;
+import java.util.function.Function;
+
+/** Knn classification model fitted by estimator. */
+public class KnnModel implements Model<KnnModel>, KnnParams<KnnModel> {
+ protected Map<Param<?>, Object> params = new HashMap<>();
+ private Table[] modelData;
+
+ /** constructor. */
+ public KnnModel() {
+ ParamUtils.initializeMapWithDefaultValues(params, this);
+ }
+
+ /**
+ * constructor.
+ *
+ * @param params parameters for algorithm.
+ */
+ public KnnModel(Map<Param<?>, Object> params) {
+ this.params = params;
+ }
+
+ /**
+ * Set model data for knn prediction.
+ *
+ * @param modelData knn model.
+ * @return knn model.
+ */
+ @Override
+ public KnnModel setModelData(Table... modelData) {
+ this.modelData = modelData;
+ return this;
+ }
+
+ /**
+ * get model data.
+ *
+ * @return list of tables.
+ */
+ @Override
+ public Table[] getModelData() {
+ return modelData;
+ }
+
+ /**
+ * @param inputs a list of tables.
+ * @return result.
+ */
+ @Override
+ public Table[] transform(Table... inputs) {
+ StreamTableEnvironment tEnv =
+ (StreamTableEnvironment) ((TableImpl)
inputs[0]).getTableEnvironment();
+ DataStream<Row> input = tEnv.toDataStream(inputs[0]);
+ DataStream<Row> model = tEnv.toDataStream(modelData[0]);
+ final String BROADCAST_STR = "broadcastModelKey";
+ Map<String, DataStream<?>> broadcastMap = new HashMap<>(1);
+ broadcastMap.put(BROADCAST_STR, model);
+ ResolvedSchema modelSchema = modelData[0].getResolvedSchema();
+ DataType idType =
+
modelSchema.getColumnDataTypes().get(modelSchema.getColumnNames().size() - 1);
+ String[] reservedCols =
+ inputs[0].getResolvedSchema().getColumnNames().toArray(new
String[0]);
+ DataType[] reservedTypes =
+ inputs[0].getResolvedSchema().getColumnDataTypes().toArray(new
DataType[0]);
+ String[] resultCols = new String[] {(String)
params.get(KnnParams.PREDICTION_COL)};
+ DataType[] resultTypes = new DataType[] {idType};
+ ResolvedSchema outputSchema =
+ ResolvedSchema.physical(
+ ArrayUtils.addAll(reservedCols, resultCols),
+ ArrayUtils.addAll(reservedTypes, resultTypes));
Review comment:
done
##########
File path:
flink-ml-lib/src/main/java/org/apache/flink/ml/classification/knn/KnnModel.java
##########
@@ -0,0 +1,489 @@
+package org.apache.flink.ml.classification.knn;
+
+import org.apache.flink.api.common.eventtime.WatermarkStrategy;
+import org.apache.flink.api.common.functions.RichMapFunction;
+import org.apache.flink.api.common.typeinfo.TypeInformation;
+import org.apache.flink.api.connector.source.Source;
+import org.apache.flink.api.java.tuple.Tuple2;
+import org.apache.flink.api.java.typeutils.RowTypeInfo;
+import org.apache.flink.connector.file.sink.FileSink;
+import org.apache.flink.connector.file.src.FileSource;
+import org.apache.flink.core.fs.Path;
+import org.apache.flink.ml.api.Model;
+import org.apache.flink.ml.common.broadcast.BroadcastUtils;
+import org.apache.flink.ml.linalg.DenseMatrix;
+import org.apache.flink.ml.linalg.DenseVector;
+import org.apache.flink.ml.linalg.VectorUtils;
+import org.apache.flink.ml.param.Param;
+import org.apache.flink.ml.util.ParamUtils;
+import org.apache.flink.ml.util.ReadWriteUtils;
+import org.apache.flink.streaming.api.datastream.DataStream;
+import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
+import
org.apache.flink.streaming.api.functions.sink.filesystem.bucketassigners.BasePathBucketAssigner;
+import
org.apache.flink.streaming.api.functions.sink.filesystem.rollingpolicies.OnCheckpointRollingPolicy;
+import org.apache.flink.streaming.api.operators.AbstractUdfStreamOperator;
+import org.apache.flink.streaming.api.operators.OneInputStreamOperator;
+import org.apache.flink.streaming.runtime.streamrecord.StreamRecord;
+import org.apache.flink.table.api.DataTypes;
+import org.apache.flink.table.api.Schema;
+import org.apache.flink.table.api.Table;
+import org.apache.flink.table.api.bridge.java.StreamTableEnvironment;
+import org.apache.flink.table.api.internal.TableImpl;
+import org.apache.flink.table.catalog.ResolvedSchema;
+import org.apache.flink.table.types.DataType;
+import org.apache.flink.table.types.logical.utils.LogicalTypeParser;
+import org.apache.flink.table.types.utils.LogicalTypeDataTypeConverter;
+import org.apache.flink.types.Row;
+
+import org.apache.flink.shaded.curator4.com.google.common.collect.ImmutableMap;
+import
org.apache.flink.shaded.jackson2.com.fasterxml.jackson.core.JsonProcessingException;
+
+import org.apache.commons.lang3.ArrayUtils;
+
+import java.io.IOException;
+import java.util.ArrayList;
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+import java.util.PriorityQueue;
+import java.util.TreeMap;
+import java.util.function.Function;
+
+/** Knn classification model fitted by estimator. */
+public class KnnModel implements Model<KnnModel>, KnnParams<KnnModel> {
+ protected Map<Param<?>, Object> params = new HashMap<>();
+ private Table[] modelData;
+
+ /** constructor. */
+ public KnnModel() {
+ ParamUtils.initializeMapWithDefaultValues(params, this);
+ }
+
+ /**
+ * constructor.
+ *
+ * @param params parameters for algorithm.
+ */
+ public KnnModel(Map<Param<?>, Object> params) {
+ this.params = params;
+ }
+
+ /**
+ * Set model data for knn prediction.
+ *
+ * @param modelData knn model.
+ * @return knn model.
+ */
+ @Override
+ public KnnModel setModelData(Table... modelData) {
+ this.modelData = modelData;
+ return this;
+ }
+
+ /**
+ * get model data.
+ *
+ * @return list of tables.
+ */
+ @Override
+ public Table[] getModelData() {
+ return modelData;
+ }
+
+ /**
+ * @param inputs a list of tables.
+ * @return result.
+ */
+ @Override
+ public Table[] transform(Table... inputs) {
+ StreamTableEnvironment tEnv =
+ (StreamTableEnvironment) ((TableImpl)
inputs[0]).getTableEnvironment();
+ DataStream<Row> input = tEnv.toDataStream(inputs[0]);
+ DataStream<Row> model = tEnv.toDataStream(modelData[0]);
+ final String BROADCAST_STR = "broadcastModelKey";
+ Map<String, DataStream<?>> broadcastMap = new HashMap<>(1);
+ broadcastMap.put(BROADCAST_STR, model);
+ ResolvedSchema modelSchema = modelData[0].getResolvedSchema();
+ DataType idType =
+
modelSchema.getColumnDataTypes().get(modelSchema.getColumnNames().size() - 1);
+ String[] reservedCols =
+ inputs[0].getResolvedSchema().getColumnNames().toArray(new
String[0]);
+ DataType[] reservedTypes =
+ inputs[0].getResolvedSchema().getColumnDataTypes().toArray(new
DataType[0]);
+ String[] resultCols = new String[] {(String)
params.get(KnnParams.PREDICTION_COL)};
+ DataType[] resultTypes = new DataType[] {idType};
+ ResolvedSchema outputSchema =
+ ResolvedSchema.physical(
+ ArrayUtils.addAll(reservedCols, resultCols),
+ ArrayUtils.addAll(reservedTypes, resultTypes));
+
+ DataType[] dataTypes = outputSchema.getColumnDataTypes().toArray(new
DataType[0]);
+ TypeInformation<?>[] typeInformations = new
TypeInformation[dataTypes.length];
+
+ for (int i = 0; i < dataTypes.length; ++i) {
+ typeInformations[i] =
TypeInformation.of(dataTypes[i].getLogicalType().getClass());
+ }
+
+ Function<List<DataStream<?>>, DataStream<Row>> function =
+ dataStreams -> {
+ DataStream stream = dataStreams.get(0);
+ return stream.transform(
+ "mapFunc",
+ new RowTypeInfo(
+ typeInformations,
+ outputSchema.getColumnNames().toArray(new
String[0])),
+ new PredictOperator(
Review comment:
done
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]