weibozhao commented on a change in pull request #24:
URL: https://github.com/apache/flink-ml/pull/24#discussion_r762976749



##########
File path: 
flink-ml-lib/src/main/java/org/apache/flink/ml/classification/knn/FastDistanceMatrixData.java
##########
@@ -0,0 +1,122 @@
+package org.apache.flink.ml.classification.knn;
+
+import org.apache.flink.ml.linalg.DenseMatrix;
+import org.apache.flink.ml.util.ReadWriteUtils;
+import org.apache.flink.util.Preconditions;
+
+import 
org.apache.flink.shaded.jackson2.com.fasterxml.jackson.core.JsonProcessingException;
+
+import java.io.Serializable;
+import java.util.HashMap;
+import java.util.Map;
+
+/**
+ * Save the data for calculating distance fast. The FastDistanceMatrixData 
saves several dense
+ * vectors in a single matrix. The vectors are organized in columns, which 
means each column is a
+ * single vector. For example, vec1: 0,1,2, vec2: 3,4,5, vec3: 6,7,8, then the 
data in matrix is
+ * organized as: vec1,vec2,vec3. And the data array in <code>vectors</code> is 
{0,1,2,3,4,5,6,7,8}.
+ */
+public class FastDistanceMatrixData implements Serializable {
+
+    /**
+     * Stores several dense vectors in columns. For example, if the vectorSize 
is n, and matrix
+     * saves m vectors, then the number of rows of <code>vectors</code> is n 
and the number of cols
+     * of <code>vectors</code> is m.
+     */
+    public final DenseMatrix vectors;
+    /**
+     * Save the extra info besides the vector. Each vector is related to one 
row. Thus, for
+     * FastDistanceVectorData, the length of <code>rows</code> is one. And for
+     * FastDistanceMatrixData, the length of <code>rows</code> is equal to the 
number of cols of
+     * <code>matrix</code>. Besides, the order of the rows are the same with 
the vectors.
+     */
+    public final String[] ids;
+
+    /**
+     * Stores some extra info extracted from the vector. It's also organized 
in columns. For
+     * example, if we want to save the L1 norm and L2 norm of the vector, then 
the two values are
+     * viewed as a two-dimension label vector. We organize the norm vectors 
together to get the
+     * <code>label</code>. If the number of cols of <code>vectors</code> is m, 
then in this case the
+     * dimension of <code>label</code> is 2 * m.
+     */
+    public DenseMatrix label;
+
+    public String[] getIds() {
+        return ids;
+    }
+
+    /**
+     * Constructor, initialize the vector data and extra info.
+     *
+     * @param vectors DenseMatrix which saves vectors in columns.
+     * @param ids extra info besides the vector.
+     */
+    public FastDistanceMatrixData(DenseMatrix vectors, String[] ids) {
+        this.ids = ids;
+        Preconditions.checkNotNull(vectors, "DenseMatrix should not be null!");
+        if (null != ids) {
+            Preconditions.checkArgument(
+                    vectors.numCols() == ids.length,
+                    "The column number of DenseMatrix must be equal to the 
rows array length!");
+        }
+        this.vectors = vectors;
+    }
+
+    /**
+     * serialization of FastDistanceMatrixData.
+     *
+     * @return json string.
+     */
+    @Override
+    public String toString() {

Review comment:
       This class has been deleted.




-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


Reply via email to