Repository: flink Updated Branches: refs/heads/master 950b79c59 -> 90c0142ef
[ml] Fixes implicit issue with BreezeVectorConverter Project: http://git-wip-us.apache.org/repos/asf/flink/repo Commit: http://git-wip-us.apache.org/repos/asf/flink/commit/90c0142e Tree: http://git-wip-us.apache.org/repos/asf/flink/tree/90c0142e Diff: http://git-wip-us.apache.org/repos/asf/flink/diff/90c0142e Branch: refs/heads/master Commit: 90c0142ef21ae329acc843558e7ba6bc160df8d8 Parents: 950b79c Author: Till Rohrmann <trohrm...@apache.org> Authored: Tue Jun 2 13:22:47 2015 +0200 Committer: Till Rohrmann <trohrm...@apache.org> Committed: Tue Jun 2 17:02:26 2015 +0200 ---------------------------------------------------------------------- .../flink/ml/math/BreezeVectorConverter.scala | 47 -------------------- .../org/apache/flink/ml/math/DenseVector.scala | 15 +++++++ .../org/apache/flink/ml/math/SparseVector.scala | 22 +++++++++ .../scala/org/apache/flink/ml/math/Vector.scala | 23 ++++++++++ .../flink/ml/preprocessing/StandardScaler.scala | 2 +- .../apache/flink/ml/math/BreezeMathSuite.scala | 30 +++++++++++++ 6 files changed, 91 insertions(+), 48 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/flink/blob/90c0142e/flink-staging/flink-ml/src/main/scala/org/apache/flink/ml/math/BreezeVectorConverter.scala ---------------------------------------------------------------------- diff --git a/flink-staging/flink-ml/src/main/scala/org/apache/flink/ml/math/BreezeVectorConverter.scala b/flink-staging/flink-ml/src/main/scala/org/apache/flink/ml/math/BreezeVectorConverter.scala index f5f7469..0bb24f3 100644 --- a/flink-staging/flink-ml/src/main/scala/org/apache/flink/ml/math/BreezeVectorConverter.scala +++ b/flink-staging/flink-ml/src/main/scala/org/apache/flink/ml/math/BreezeVectorConverter.scala @@ -18,8 +18,6 @@ package org.apache.flink.ml.math -import breeze.linalg.{SparseVector => BreezeSparseVector} -import breeze.linalg.{DenseVector => BreezeDenseVector} import breeze.linalg.{Vector => BreezeVector} /** Type class which allows the conversion from Breeze vectors to Flink vectors @@ -34,48 +32,3 @@ trait BreezeVectorConverter[T <: Vector] extends Serializable { */ def convert(vector: BreezeVector[Double]): T } - -object BreezeVectorConverter{ - - /** Type class implementation for [[org.apache.flink.ml.math.DenseVector]] */ - implicit val denseVectorConverter = new BreezeVectorConverter[DenseVector] { - override def convert(vector: BreezeVector[Double]): DenseVector = { - vector match { - case dense: BreezeDenseVector[Double] => new DenseVector(dense.data) - case sparse: BreezeSparseVector[Double] => new DenseVector(sparse.toDenseVector.data) - } - } - } - - /** Type class implementation for [[org.apache.flink.ml.math.SparseVector]] */ - implicit val sparseVectorConverter = new BreezeVectorConverter[SparseVector] { - override def convert(vector: BreezeVector[Double]): SparseVector = { - vector match { - case dense: BreezeDenseVector[Double] => - SparseVector.fromCOO( - dense.length, - dense.iterator.toIterable) - case sparse: BreezeSparseVector[Double] => - new SparseVector( - sparse.used, - sparse.index.take(sparse.used), - sparse.data.take(sparse.used)) - } - } - } - - /** Type class implementation for [[Vector]] */ - implicit val vectorConverter = new BreezeVectorConverter[Vector] { - override def convert(vector: BreezeVector[Double]): Vector = { - vector match { - case dense: BreezeDenseVector[Double] => new DenseVector(dense.data) - - case sparse: BreezeSparseVector[Double] => - new SparseVector( - sparse.used, - sparse.index.take(sparse.used), - sparse.data.take(sparse.used)) - } - } - } -} http://git-wip-us.apache.org/repos/asf/flink/blob/90c0142e/flink-staging/flink-ml/src/main/scala/org/apache/flink/ml/math/DenseVector.scala ---------------------------------------------------------------------- diff --git a/flink-staging/flink-ml/src/main/scala/org/apache/flink/ml/math/DenseVector.scala b/flink-staging/flink-ml/src/main/scala/org/apache/flink/ml/math/DenseVector.scala index 079e4bc..f242496 100644 --- a/flink-staging/flink-ml/src/main/scala/org/apache/flink/ml/math/DenseVector.scala +++ b/flink-staging/flink-ml/src/main/scala/org/apache/flink/ml/math/DenseVector.scala @@ -18,6 +18,8 @@ package org.apache.flink.ml.math +import breeze.linalg.{SparseVector => BreezeSparseVector, DenseVector => BreezeDenseVector, Vector => BreezeVector} + /** * Dense vector implementation of [[Vector]]. The data is represented in a continuous array of * doubles. @@ -134,4 +136,17 @@ object DenseVector { def init(size: Int, value: Double): DenseVector = { new DenseVector(Array.fill(size)(value)) } + + /** BreezeVectorConverter implementation for [[org.apache.flink.ml.math.DenseVector]] + * + * This allows to convert Breeze vectors into [[DenseVector]]. + */ + implicit val denseVectorConverter = new BreezeVectorConverter[DenseVector] { + override def convert(vector: BreezeVector[Double]): DenseVector = { + vector match { + case dense: BreezeDenseVector[Double] => new DenseVector(dense.data) + case sparse: BreezeSparseVector[Double] => new DenseVector(sparse.toDenseVector.data) + } + } + } } http://git-wip-us.apache.org/repos/asf/flink/blob/90c0142e/flink-staging/flink-ml/src/main/scala/org/apache/flink/ml/math/SparseVector.scala ---------------------------------------------------------------------- diff --git a/flink-staging/flink-ml/src/main/scala/org/apache/flink/ml/math/SparseVector.scala b/flink-staging/flink-ml/src/main/scala/org/apache/flink/ml/math/SparseVector.scala index ddfa084..cc2a227 100644 --- a/flink-staging/flink-ml/src/main/scala/org/apache/flink/ml/math/SparseVector.scala +++ b/flink-staging/flink-ml/src/main/scala/org/apache/flink/ml/math/SparseVector.scala @@ -18,6 +18,8 @@ package org.apache.flink.ml.math +import breeze.linalg.{SparseVector => BreezeSparseVector, DenseVector => BreezeDenseVector, Vector => BreezeVector} + import scala.util.Sorting /** Sparse vector implementation storing the data in two arrays. One index contains the sorted @@ -226,4 +228,24 @@ object SparseVector { def fromCOO(size: Int, entry: (Int, Int)): SparseVector = { fromCOO(size, (entry._1, entry._2.toDouble)) } + + /** BreezeVectorConverter implementation for [[org.apache.flink.ml.math.SparseVector]] + * + * This allows to convert Breeze vectors into [[SparseVector]] + */ + implicit val sparseVectorConverter = new BreezeVectorConverter[SparseVector] { + override def convert(vector: BreezeVector[Double]): SparseVector = { + vector match { + case dense: BreezeDenseVector[Double] => + SparseVector.fromCOO( + dense.length, + dense.iterator.toIterable) + case sparse: BreezeSparseVector[Double] => + new SparseVector( + sparse.used, + sparse.index.take(sparse.used), + sparse.data.take(sparse.used)) + } + } + } } http://git-wip-us.apache.org/repos/asf/flink/blob/90c0142e/flink-staging/flink-ml/src/main/scala/org/apache/flink/ml/math/Vector.scala ---------------------------------------------------------------------- diff --git a/flink-staging/flink-ml/src/main/scala/org/apache/flink/ml/math/Vector.scala b/flink-staging/flink-ml/src/main/scala/org/apache/flink/ml/math/Vector.scala index 0b1f0cd..ca87e5b 100644 --- a/flink-staging/flink-ml/src/main/scala/org/apache/flink/ml/math/Vector.scala +++ b/flink-staging/flink-ml/src/main/scala/org/apache/flink/ml/math/Vector.scala @@ -18,6 +18,9 @@ package org.apache.flink.ml.math +import breeze.linalg.{SparseVector => BreezeSparseVector, DenseVector => BreezeDenseVector, Vector => BreezeVector} +import org.apache.flink.ml.math.Vector + /** Base trait for Vectors * */ @@ -72,3 +75,23 @@ trait Vector extends Serializable { } } } + +object Vector{ + /** BreezeVectorConverter implementation for [[Vector]] + * + * This allows to convert Breeze vectors into [[Vector]]. + */ + implicit val vectorConverter = new BreezeVectorConverter[Vector] { + override def convert(vector: BreezeVector[Double]): Vector = { + vector match { + case dense: BreezeDenseVector[Double] => new DenseVector(dense.data) + + case sparse: BreezeSparseVector[Double] => + new SparseVector( + sparse.used, + sparse.index.take(sparse.used), + sparse.data.take(sparse.used)) + } + } + } +} http://git-wip-us.apache.org/repos/asf/flink/blob/90c0142e/flink-staging/flink-ml/src/main/scala/org/apache/flink/ml/preprocessing/StandardScaler.scala ---------------------------------------------------------------------- diff --git a/flink-staging/flink-ml/src/main/scala/org/apache/flink/ml/preprocessing/StandardScaler.scala b/flink-staging/flink-ml/src/main/scala/org/apache/flink/ml/preprocessing/StandardScaler.scala index 7992b02..3b9c8d2 100644 --- a/flink-staging/flink-ml/src/main/scala/org/apache/flink/ml/preprocessing/StandardScaler.scala +++ b/flink-staging/flink-ml/src/main/scala/org/apache/flink/ml/preprocessing/StandardScaler.scala @@ -250,7 +250,7 @@ object StandardScaler { breezeVector -= broadcastMean breezeVector :/= broadcastStd breezeVector = (breezeVector :* std) + mean - LabeledVector(label, breezeVector.fromBreeze[Vector]) + LabeledVector(label, breezeVector.fromBreeze) } } } http://git-wip-us.apache.org/repos/asf/flink/blob/90c0142e/flink-staging/flink-ml/src/test/scala/org/apache/flink/ml/math/BreezeMathSuite.scala ---------------------------------------------------------------------- diff --git a/flink-staging/flink-ml/src/test/scala/org/apache/flink/ml/math/BreezeMathSuite.scala b/flink-staging/flink-ml/src/test/scala/org/apache/flink/ml/math/BreezeMathSuite.scala index b03f08f..0d230c5 100644 --- a/flink-staging/flink-ml/src/test/scala/org/apache/flink/ml/math/BreezeMathSuite.scala +++ b/flink-staging/flink-ml/src/test/scala/org/apache/flink/ml/math/BreezeMathSuite.scala @@ -19,6 +19,7 @@ package org.apache.flink.ml.math import Breeze._ +import breeze.linalg import org.scalatest.{Matchers, FlatSpec} @@ -65,4 +66,33 @@ class BreezeMathSuite extends FlatSpec with Matchers { result should equal(expectedMatrix) } + + it should "convert a dense Flink vector into a dense Breeze vector and vice versa" in { + val vector = DenseVector(1, 2, 3) + + val breezeVector = vector.asBreeze + + val flinkVector = breezeVector.fromBreeze + + breezeVector.getClass should be(new linalg.DenseVector[Double](0).getClass()) + flinkVector.getClass should be (classOf[DenseVector]) + + flinkVector should equal(vector) + } + + it should "convert a sparse Flink vector into a sparse Breeze vector and given the right " + + "converter back into a dense Flink vector" in { + implicit val converter = implicitly[BreezeVectorConverter[DenseVector]] + + val vector = SparseVector.fromCOO(3, (1, 1.0), (2, 2.0)) + + val breezeVector = vector.asBreeze + + val flinkVector = breezeVector.fromBreeze + + breezeVector.getClass should be(new linalg.SparseVector[Double](null).getClass()) + flinkVector.getClass should be (classOf[DenseVector]) + + flinkVector.equalsVector(vector) should be(true) + } }