weibozhao commented on a change in pull request #24:
URL: https://github.com/apache/flink-ml/pull/24#discussion_r747330413
##########
File path:
flink-ml-lib/src/main/java/org/apache/flink/ml/algo/batch/knn/KnnTrainBatchOp.java
##########
@@ -0,0 +1,230 @@
+package org.apache.flink.ml.algo.batch.knn;
+
+import org.apache.flink.api.common.functions.MapFunction;
+import org.apache.flink.api.common.functions.RichMapPartitionFunction;
+import org.apache.flink.api.common.typeinfo.TypeInformation;
+import org.apache.flink.api.common.typeinfo.Types;
+import org.apache.flink.api.java.typeutils.RowTypeInfo;
+import org.apache.flink.configuration.Configuration;
+import org.apache.flink.ml.algo.batch.knn.distance.BaseFastDistance;
+import org.apache.flink.ml.algo.batch.knn.distance.BaseFastDistanceData;
+import org.apache.flink.ml.algo.batch.knn.distance.FastDistanceMatrixData;
+import org.apache.flink.ml.algo.batch.knn.distance.FastDistanceSparseData;
+import org.apache.flink.ml.algo.batch.knn.distance.FastDistanceVectorData;
+import org.apache.flink.ml.common.BatchOperator;
+import org.apache.flink.ml.common.MapPartitionFunctionWrapper;
+import org.apache.flink.ml.common.linalg.DenseVector;
+import org.apache.flink.ml.common.linalg.VectorUtil;
+import org.apache.flink.ml.param.Param;
+import org.apache.flink.ml.param.StringParam;
+import org.apache.flink.ml.params.knn.HasKnnDistanceType;
+import org.apache.flink.ml.params.knn.KnnTrainParams;
+import org.apache.flink.streaming.api.datastream.DataStream;
+import org.apache.flink.table.api.Table;
+import org.apache.flink.table.api.bridge.java.StreamTableEnvironment;
+import org.apache.flink.table.api.internal.TableImpl;
+import org.apache.flink.table.catalog.ResolvedSchema;
+import org.apache.flink.table.types.DataType;
+import org.apache.flink.types.Row;
+import org.apache.flink.util.Collector;
+import org.apache.flink.util.Preconditions;
+
+import java.io.IOException;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+import static
org.apache.flink.ml.algo.batch.knn.distance.BaseFastDistanceData.pGson;
+
+/**
+ * KNN is to classify unlabeled observations by assigning them to the class of
the most similar
+ * labeled examples. Note that though there is no ``training process`` in KNN,
we create a ``fake
+ * one`` to use in pipeline model. In this operator, we do some preparation to
speed up the
+ * inference process.
+ */
+public final class KnnTrainBatchOp extends BatchOperator<KnnTrainBatchOp>
Review comment:
in the estimator, developer need to think the input table is a bounded
stream (batch) or unbounded stream. That is why we introduce the concept of
batchOp and streamOp
we want to give the batch algo developer a clean env to develop batch algo.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]