weibozhao commented on a change in pull request #24:
URL: https://github.com/apache/flink-ml/pull/24#discussion_r762977739
##########
File path:
flink-ml-lib/src/test/java/org/apache/flink/ml/classification/knn/KnnTest.java
##########
@@ -0,0 +1,285 @@
+package org.apache.flink.ml.classification.knn;
+
+import org.apache.flink.api.common.restartstrategy.RestartStrategies;
+import org.apache.flink.configuration.Configuration;
+import org.apache.flink.ml.api.Stage;
+import org.apache.flink.ml.builder.Pipeline;
+import org.apache.flink.ml.common.param.HasFeaturesCol;
+import org.apache.flink.ml.common.param.HasK;
+import org.apache.flink.ml.common.param.HasLabelCol;
+import org.apache.flink.ml.common.param.HasPredictionCol;
+import org.apache.flink.ml.linalg.DenseVector;
+import org.apache.flink.ml.linalg.Vectors;
+import org.apache.flink.ml.param.Param;
+import org.apache.flink.streaming.api.datastream.DataStream;
+import
org.apache.flink.streaming.api.environment.ExecutionCheckpointingOptions;
+import org.apache.flink.streaming.api.environment.StreamExecutionEnvironment;
+import org.apache.flink.table.api.DataTypes;
+import org.apache.flink.table.api.Schema;
+import org.apache.flink.table.api.Table;
+import org.apache.flink.table.api.bridge.java.StreamTableEnvironment;
+import org.apache.flink.types.Row;
+
+import org.apache.commons.collections.IteratorUtils;
+import org.junit.Before;
+import org.junit.Test;
+
+import java.nio.file.Files;
+import java.util.ArrayList;
+import java.util.Arrays;
+import java.util.HashMap;
+import java.util.List;
+import java.util.Map;
+
+import static org.junit.Assert.assertEquals;
+
+/** knn algorithm test. */
+public class KnnTest {
+ private StreamExecutionEnvironment env;
+ private StreamTableEnvironment tEnv;
+ private Table trainData;
+
+ List<Row> trainArray =
+ new ArrayList<>(
+ Arrays.asList(
+ Row.of("f", Vectors.dense(2.0, 3.0)),
+ Row.of("f", Vectors.dense(2.1, 3.1)),
+ Row.of("m", Vectors.dense(200.1, 300.1)),
+ Row.of("m", Vectors.dense(200.2, 300.2)),
+ Row.of("m", Vectors.dense(200.3, 300.3)),
+ Row.of("m", Vectors.dense(200.4, 300.4)),
+ Row.of("m", Vectors.dense(200.4, 300.4)),
+ Row.of("m", Vectors.dense(200.6, 300.6)),
+ Row.of("f", Vectors.dense(2.1, 3.1)),
+ Row.of("f", Vectors.dense(2.1, 3.1)),
+ Row.of("f", Vectors.dense(2.1, 3.1)),
+ Row.of("f", Vectors.dense(2.1, 3.1)),
+ Row.of("f", Vectors.dense(2.3, 3.2)),
+ Row.of("f", Vectors.dense(2.3, 3.2)),
+ Row.of("c", Vectors.dense(2.8, 3.2)),
+ Row.of("d", Vectors.dense(300., 3.2)),
+ Row.of("f", Vectors.dense(2.2, 3.2)),
+ Row.of("e", Vectors.dense(2.4, 3.2)),
+ Row.of("e", Vectors.dense(2.5, 3.2)),
+ Row.of("e", Vectors.dense(2.5, 3.2)),
+ Row.of("f", Vectors.dense(2.1, 3.1))));
+
+ List<Row> testArray =
+ new ArrayList<>(
+ Arrays.asList(Row.of(Vectors.dense(4.0, 4.1)),
Row.of(Vectors.dense(300, 42))));
+ private Table testData;
+
+ Row[] expectedData =
+ new Row[] {Row.of("e", Vectors.dense(4.0, 4.1)), Row.of("m",
Vectors.dense(300, 42))};
+
+ @Before
+ public void before() {
+ Configuration config = new Configuration();
+
config.set(ExecutionCheckpointingOptions.ENABLE_CHECKPOINTS_AFTER_TASKS_FINISH,
true);
+ env = StreamExecutionEnvironment.getExecutionEnvironment(config);
+ env.setParallelism(4);
+ env.enableCheckpointing(100);
+ env.setRestartStrategy(RestartStrategies.noRestart());
+ tEnv = StreamTableEnvironment.create(env);
+
+ Schema schema =
+ Schema.newBuilder()
+ .column("f0", DataTypes.STRING())
+ .column("f1", DataTypes.of(DenseVector.class))
+ .build();
+
+ DataStream<Row> dataStream = env.fromCollection(trainArray);
+ trainData = tEnv.fromDataStream(dataStream, schema).as("label, vec");
+
+ Schema outputSchema =
+ Schema.newBuilder().column("f0",
DataTypes.of(DenseVector.class)).build();
+
+ DataStream<Row> predDataStream = env.fromCollection(testArray);
+ testData = tEnv.fromDataStream(predDataStream, outputSchema).as("vec");
+ }
+
+ /** test knn Estimator. */
+ @Test
+ public void testFitAntTransform() throws Exception {
+ Knn knn =
+ new Knn()
+ .setLabelCol("label")
+ .setFeaturesCol("vec")
+ .setK(4)
+ .setPredictionCol("pred");
+
+ KnnModel knnModel = knn.fit(trainData);
+ Table result = knnModel.transform(testData)[0];
+
+ DataStream<Row> output = tEnv.toDataStream(result);
+
+ List<Row> rows = IteratorUtils.toList(output.executeAndCollect());
+ for (Row value : rows) {
+ for (Row exp : expectedData) {
+ assert !exp.getField(1).equals(value.getField(0))
+ || (exp.getField(0).equals(value.getField(1)));
+ }
+ }
+ }
+
+ /** test knn Estimator. */
+ @Test
+ public void testParamsConstructor() throws Exception {
+ Map<Param<?>, Object> params = new HashMap<>();
+ params.put(HasLabelCol.LABEL_COL, "label");
+ params.put(HasFeaturesCol.FEATURES_COL, "vec");
+ params.put(HasK.K, 4);
+ params.put(HasPredictionCol.PREDICTION_COL, "pred");
+ Knn knn = new Knn(params);
+
+ KnnModel knnModel = knn.fit(trainData);
+ Table result = knnModel.transform(testData)[0];
+
+ DataStream<Row> output = tEnv.toDataStream(result);
+
+ List<Row> rows = IteratorUtils.toList(output.executeAndCollect());
+ for (Row value : rows) {
+ for (Row exp : expectedData) {
+ assert !exp.getField(1).equals(value.getField(0))
+ || (exp.getField(0).equals(value.getField(1)));
+ }
+ }
+ }
+
+ /** test knn as a pipeline stage. */
+ @Test
+ public void testPipeline() throws Exception {
+ Knn knn =
+ new Knn()
+ .setLabelCol("label")
+ .setFeaturesCol("vec")
+ .setK(4)
+ .setPredictionCol("pred");
+
+ List<Stage<?>> stages = new ArrayList<>();
+ stages.add(knn);
+
+ Pipeline pipe = new Pipeline(stages);
Review comment:
Ut has been deleted.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]