lindong28 commented on code in PR #110: URL: https://github.com/apache/flink-ml/pull/110#discussion_r901606296
########## flink-ml-core/src/main/java/org/apache/flink/ml/linalg/VectorWithNorm.java: ########## @@ -0,0 +1,48 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.flink.ml.linalg; + +import org.apache.flink.api.common.typeinfo.TypeInfo; +import org.apache.flink.ml.linalg.typeinfo.VectorWithNormTypeInfoFactory; + +/** A vector with its norm. */ +@TypeInfo(VectorWithNormTypeInfoFactory.class) +public class VectorWithNorm { + private final Vector vector; + + public double l2Norm; Review Comment: This variable can be final. ########## flink-ml-core/src/main/java/org/apache/flink/ml/linalg/typeinfo/VectorWithNormSerializer.java: ########## @@ -0,0 +1,189 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.flink.ml.linalg.typeinfo; + +import org.apache.flink.api.common.typeutils.SimpleTypeSerializerSnapshot; +import org.apache.flink.api.common.typeutils.TypeSerializer; +import org.apache.flink.api.common.typeutils.TypeSerializerSnapshot; +import org.apache.flink.core.memory.DataInputView; +import org.apache.flink.core.memory.DataOutputView; +import org.apache.flink.ml.linalg.DenseVector; +import org.apache.flink.ml.linalg.SparseVector; +import org.apache.flink.ml.linalg.Vector; +import org.apache.flink.ml.linalg.VectorWithNorm; + +import java.io.IOException; +import java.io.UnsupportedEncodingException; + +/** Specialized serializer for {@link VectorWithNorm}. */ +public class VectorWithNormSerializer extends TypeSerializer<VectorWithNorm> { + private final DenseVectorSerializer denseVectorSerializer = new DenseVectorSerializer(); + + private static final SparseVectorSerializer sparseVectorSerializer = + SparseVectorSerializer.INSTANCE; + + private static final long serialVersionUID = 1L; + + private static final double[] EMPTY = new double[0]; + + @Override + public boolean isImmutableType() { + return false; + } + + @Override + public TypeSerializer<VectorWithNorm> duplicate() { + return new VectorWithNormSerializer(); + } + + @Override + public VectorWithNorm createInstance() { + return new VectorWithNorm(new DenseVector(EMPTY)); + } + + @Override + public VectorWithNorm copy(VectorWithNorm from) { + Vector vector; + if (from.getVector() instanceof DenseVector) { + vector = denseVectorSerializer.copy((DenseVector) from.getVector()); + } else { + vector = sparseVectorSerializer.copy((SparseVector) from.getVector()); + } + + return new VectorWithNorm(vector, from.getL2Norm()); + } + + @Override + public VectorWithNorm copy(VectorWithNorm from, VectorWithNorm reuse) { + Vector vector; + if (from.getVector() instanceof DenseVector && reuse.getVector() instanceof DenseVector) { + vector = + denseVectorSerializer.copy( + (DenseVector) from.getVector(), (DenseVector) reuse.getVector()); + } else if (from.getVector() instanceof SparseVector + && reuse.getVector() instanceof SparseVector) { + vector = + sparseVectorSerializer.copy( + (SparseVector) from.getVector(), (SparseVector) reuse.getVector()); + } else { + vector = from.getVector().clone(); + } + + if (vector == reuse.getVector()) { + reuse.l2Norm = from.l2Norm; Review Comment: Suppose we have already re-used the vector inside the `vectorWithNorm`, I am not sure we can get an extra non-trivial performance improvement by re-using the `vectorWithNorm` object itself. It might be better to keep the code simpler (e.g. let `l2Norm` by public final and remove the `if` statement here) if re-using the `vectorWithNorm` have a negligible impact on performance. ########## flink-ml-core/src/main/java/org/apache/flink/ml/linalg/typeinfo/VectorWithNormTypeInfo.java: ########## @@ -0,0 +1,83 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.flink.ml.linalg.typeinfo; + +import org.apache.flink.api.common.ExecutionConfig; +import org.apache.flink.api.common.typeinfo.TypeInformation; +import org.apache.flink.api.common.typeutils.TypeSerializer; +import org.apache.flink.ml.linalg.VectorWithNorm; + +/** A {@link TypeInformation} for the {@link VectorWithNorm} type. */ +public class VectorWithNormTypeInfo extends TypeInformation<VectorWithNorm> { + @Override + public boolean isBasicType() { + return false; + } + + @Override + public boolean isTupleType() { + return false; + } + + @Override + public int getArity() { + return 2; + } + + @Override + public int getTotalFields() { + return 2; + } + + @Override + public Class<VectorWithNorm> getTypeClass() { + return VectorWithNorm.class; + } + + @Override + public boolean isKeyType() { + return false; + } + + @Override + public TypeSerializer<VectorWithNorm> createSerializer(ExecutionConfig executionConfig) { + return new VectorWithNormSerializer(); + } + + @Override + public String toString() { + return "DenseVectorWithNormType"; Review Comment: DenseVectorWithNormType -> VectorWithNormTypeInfo ########## flink-ml-core/src/main/java/org/apache/flink/ml/linalg/VectorWithNorm.java: ########## @@ -0,0 +1,48 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.flink.ml.linalg; + +import org.apache.flink.api.common.typeinfo.TypeInfo; +import org.apache.flink.ml.linalg.typeinfo.VectorWithNormTypeInfoFactory; + +/** A vector with its norm. */ +@TypeInfo(VectorWithNormTypeInfoFactory.class) +public class VectorWithNorm { + private final Vector vector; + + public double l2Norm; + + public VectorWithNorm(Vector vector) { + this(vector, BLAS.norm2(vector)); + } + + public VectorWithNorm(Vector vector, double l2Norm) { + this.vector = vector; + this.l2Norm = l2Norm; + } + + public Vector getVector() { Review Comment: It seems simpler to make both `vector` and `l2Norm` public final and remove the accessor methods. ########## flink-ml-core/src/main/java/org/apache/flink/ml/linalg/typeinfo/VectorWithNormSerializer.java: ########## @@ -0,0 +1,189 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ + +package org.apache.flink.ml.linalg.typeinfo; + +import org.apache.flink.api.common.typeutils.SimpleTypeSerializerSnapshot; +import org.apache.flink.api.common.typeutils.TypeSerializer; +import org.apache.flink.api.common.typeutils.TypeSerializerSnapshot; +import org.apache.flink.core.memory.DataInputView; +import org.apache.flink.core.memory.DataOutputView; +import org.apache.flink.ml.linalg.DenseVector; +import org.apache.flink.ml.linalg.SparseVector; +import org.apache.flink.ml.linalg.Vector; +import org.apache.flink.ml.linalg.VectorWithNorm; + +import java.io.IOException; +import java.io.UnsupportedEncodingException; + +/** Specialized serializer for {@link VectorWithNorm}. */ +public class VectorWithNormSerializer extends TypeSerializer<VectorWithNorm> { + private final DenseVectorSerializer denseVectorSerializer = new DenseVectorSerializer(); Review Comment: Could we re-use `VectorSerializer` instead of dealing with `DenseVectorSerializer` and `SparseVectorSerializer` in this class? -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected]
