weibozhao commented on a change in pull request #24:
URL: https://github.com/apache/flink-ml/pull/24#discussion_r747282532
##########
File path:
flink-ml-lib/src/main/java/org/apache/flink/ml/algo/batch/knn/KnnTrainBatchOp.java
##########
@@ -0,0 +1,230 @@
+package org.apache.flink.ml.algo.batch.knn;
+
+import org.apache.flink.api.common.functions.MapFunction;
+import org.apache.flink.api.common.functions.RichMapPartitionFunction;
+import org.apache.flink.api.common.typeinfo.TypeInformation;
+import org.apache.flink.api.common.typeinfo.Types;
+import org.apache.flink.api.java.typeutils.RowTypeInfo;
+import org.apache.flink.configuration.Configuration;
+import org.apache.flink.ml.algo.batch.knn.distance.BaseFastDistance;
+import org.apache.flink.ml.algo.batch.knn.distance.BaseFastDistanceData;
+import org.apache.flink.ml.algo.batch.knn.distance.FastDistanceMatrixData;
+import org.apache.flink.ml.algo.batch.knn.distance.FastDistanceSparseData;
+import org.apache.flink.ml.algo.batch.knn.distance.FastDistanceVectorData;
+import org.apache.flink.ml.common.BatchOperator;
+import org.apache.flink.ml.common.MapPartitionFunctionWrapper;
+import org.apache.flink.ml.common.linalg.DenseVector;
+import org.apache.flink.ml.common.linalg.VectorUtil;
+import org.apache.flink.ml.param.Param;
+import org.apache.flink.ml.param.StringParam;
+import org.apache.flink.ml.params.knn.HasKnnDistanceType;
+import org.apache.flink.ml.params.knn.KnnTrainParams;
+import org.apache.flink.streaming.api.datastream.DataStream;
+import org.apache.flink.table.api.Table;
+import org.apache.flink.table.api.bridge.java.StreamTableEnvironment;
+import org.apache.flink.table.api.internal.TableImpl;
+import org.apache.flink.table.catalog.ResolvedSchema;
+import org.apache.flink.table.types.DataType;
+import org.apache.flink.types.Row;
+import org.apache.flink.util.Collector;
+import org.apache.flink.util.Preconditions;
+
+import java.io.IOException;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+import static
org.apache.flink.ml.algo.batch.knn.distance.BaseFastDistanceData.pGson;
+
+/**
+ * KNN is to classify unlabeled observations by assigning them to the class of
the most similar
+ * labeled examples. Note that though there is no ``training process`` in KNN,
we create a ``fake
+ * one`` to use in pipeline model. In this operator, we do some preparation to
speed up the
+ * inference process.
+ */
+public final class KnnTrainBatchOp extends BatchOperator<KnnTrainBatchOp>
Review comment:
KnnTrainBatchOp is the kernel of KnnClassifier. The aim we introduce
BatchOp is to tell algo developer: he is in the batch Env mode, he do all
things in this env is batch action.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]