Repository: flink Updated Branches: refs/heads/master cbc8423e2 -> 015af177d
[FLINK-1717] [ml] Adds support to directly read libSVM and SVMLight files This closes #543. Project: http://git-wip-us.apache.org/repos/asf/flink/repo Commit: http://git-wip-us.apache.org/repos/asf/flink/commit/015af177 Tree: http://git-wip-us.apache.org/repos/asf/flink/tree/015af177 Diff: http://git-wip-us.apache.org/repos/asf/flink/diff/015af177 Branch: refs/heads/master Commit: 015af177d1b54ea7bdf860a3a48e5a8f022d8304 Parents: cbc8423 Author: Till Rohrmann <trohrm...@apache.org> Authored: Sat Mar 28 18:31:02 2015 +0100 Committer: Till Rohrmann <trohrm...@apache.org> Committed: Wed Apr 1 16:42:20 2015 +0200 ---------------------------------------------------------------------- flink-staging/flink-ml/pom.xml | 8 + .../scala/org/apache/flink/ml/MLUtils.scala | 122 +++++++++++++ .../apache/flink/ml/common/LabeledVector.scala | 17 +- .../flink/ml/feature/PolynomialBase.scala | 2 +- .../org/apache/flink/ml/math/DenseMatrix.scala | 9 +- .../org/apache/flink/ml/math/DenseVector.scala | 2 +- .../org/apache/flink/ml/math/SparseMatrix.scala | 3 +- .../org/apache/flink/ml/math/SparseVector.scala | 8 +- .../scala/org/apache/flink/ml/package.scala | 46 +++++ .../regression/MultipleLinearRegression.scala | 2 +- .../org/apache/flink/ml/MLUtilsSuite.scala | 112 ++++++++++++ .../ml/feature/PolynomialBaseITSuite.scala | 12 +- .../flink/ml/regression/RegressionData.scala | 180 +++++++++---------- 13 files changed, 417 insertions(+), 106 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/flink/blob/015af177/flink-staging/flink-ml/pom.xml ---------------------------------------------------------------------- diff --git a/flink-staging/flink-ml/pom.xml b/flink-staging/flink-ml/pom.xml index 899d266..91005f8 100644 --- a/flink-staging/flink-ml/pom.xml +++ b/flink-staging/flink-ml/pom.xml @@ -63,6 +63,14 @@ <dependency> <groupId>org.apache.flink</groupId> + <artifactId>flink-core</artifactId> + <version>${project.version}</version> + <type>test-jar</type> + <scope>test</scope> + </dependency> + + <dependency> + <groupId>org.apache.flink</groupId> <artifactId>flink-test-utils</artifactId> <version>${project.version}</version> <scope>test</scope> http://git-wip-us.apache.org/repos/asf/flink/blob/015af177/flink-staging/flink-ml/src/main/scala/org/apache/flink/ml/MLUtils.scala ---------------------------------------------------------------------- diff --git a/flink-staging/flink-ml/src/main/scala/org/apache/flink/ml/MLUtils.scala b/flink-staging/flink-ml/src/main/scala/org/apache/flink/ml/MLUtils.scala new file mode 100644 index 0000000..a327ddc --- /dev/null +++ b/flink-staging/flink-ml/src/main/scala/org/apache/flink/ml/MLUtils.scala @@ -0,0 +1,122 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.flink.ml + +import org.apache.flink.api.common.functions.RichMapFunction +import org.apache.flink.api.java.operators.DataSink +import org.apache.flink.api.scala._ +import org.apache.flink.configuration.Configuration +import org.apache.flink.ml.common.LabeledVector +import org.apache.flink.ml.math.SparseVector + +/** Convenience functions for machine learning tasks + * + * This object contains convenience functions for machine learning tasks: + * + * - readLibSVM: + * Reads a libSVM/SVMLight input file and returns a data set of [[LabeledVector]]. + * The file format is specified [http://svmlight.joachims.org/ here]. + * + * - writeLibSVM: + * Writes a data set of [[LabeledVector]] in libSVM/SVMLight format to disk. THe file format + * is specified [http://svmlight.joachims.org/ here]. + */ +object MLUtils { + + val DIMENSION = "dimension" + + /** Reads a file in libSVM/SVMLight format and converts the data into a data set of + * [[LabeledVector]]. The dimension of the [[LabeledVector]] is determined automatically. + * + * Since the libSVM/SVMLight format stores a vector in its sparse form, the [[LabeledVector]] + * will also be instantiated with a [[SparseVector]]. + * + * @param env [[ExecutionEnvironment]] + * @param filePath Path to the input file + * @return [[DataSet]] of [[LabeledVector]] containing the information of the libSVM/SVMLight + * file + */ + def readLibSVM(env: ExecutionEnvironment, filePath: String): DataSet[LabeledVector] = { + val labelCOODS = env.readTextFile(filePath).flatMap { + line => + // remove all comments which start with a '#' + val commentFreeLine = line.takeWhile(_ != '#').trim + + if(commentFreeLine.nonEmpty) { + val splits = commentFreeLine.split(' ') + val label = splits.head.toDouble + val sparseFeatures = splits.tail + val coos = sparseFeatures.map { + str => + val pair = str.split(':') + require(pair.length == 2, "Each feature entry has to have the form <feature>:<value>") + + // libSVM index is 1-based, but we expect it to be 0-based + val index = pair(0).toInt - 1 + val value = pair(1).toDouble + + (index, value) + } + + Some((label, coos)) + } else { + None + } + } + + // Calculate maximum dimension of vectors + val dimensionDS = labelCOODS.map { + labelCOO => + labelCOO._2.map( _._1 + 1 ).max + }.reduce(scala.math.max(_, _)) + + labelCOODS.map{ new RichMapFunction[(Double, Array[(Int, Double)]), LabeledVector] { + var dimension = 0 + + override def open(configuration: Configuration): Unit = { + dimension = getRuntimeContext.getBroadcastVariable(DIMENSION).get(0) + } + + override def map(value: (Double, Array[(Int, Double)])): LabeledVector = { + new LabeledVector(value._1, SparseVector.fromCOO(dimension, value._2)) + } + }}.withBroadcastSet(dimensionDS, DIMENSION) + } + + /** Writes a [[DataSet]] of [[LabeledVector]] to a file using the libSVM/SVMLight format. + * + * @param filePath Path to output file + * @param labeledVectors [[DataSet]] of [[LabeledVector]] to write to disk + * @return + */ + def writeLibSVM(filePath: String, labeledVectors: DataSet[LabeledVector]): DataSink[String] = { + val stringRepresentation = labeledVectors.map{ + labeledVector => + val vectorStr = labeledVector.vector. + // remove zero entries + filter( _._2 != 0). + map{case (idx, value) => (idx + 1) + ":" + value}. + mkString(" ") + + labeledVector.label + " " + vectorStr + } + + stringRepresentation.writeAsText(filePath) + } +} http://git-wip-us.apache.org/repos/asf/flink/blob/015af177/flink-staging/flink-ml/src/main/scala/org/apache/flink/ml/common/LabeledVector.scala ---------------------------------------------------------------------- diff --git a/flink-staging/flink-ml/src/main/scala/org/apache/flink/ml/common/LabeledVector.scala b/flink-staging/flink-ml/src/main/scala/org/apache/flink/ml/common/LabeledVector.scala index f3d6172..4563724 100644 --- a/flink-staging/flink-ml/src/main/scala/org/apache/flink/ml/common/LabeledVector.scala +++ b/flink-staging/flink-ml/src/main/scala/org/apache/flink/ml/common/LabeledVector.scala @@ -23,7 +23,20 @@ import org.apache.flink.ml.math.Vector /** This class represents a vector with an associated label as it is required for many supervised * learning tasks. * - * @param vector Data point * @param label Label of the data point + * @param vector Data point */ -case class LabeledVector(vector: Vector, label: Double) {} +case class LabeledVector(label: Double, vector: Vector) { + + override def equals(obj: Any): Boolean = { + obj match { + case labeledVector: LabeledVector => + vector.equals(labeledVector.vector) && label.equals(labeledVector.label) + case _ => false + } + } + + override def toString: String = { + s"LabeledVector($label, $vector)" + } +} http://git-wip-us.apache.org/repos/asf/flink/blob/015af177/flink-staging/flink-ml/src/main/scala/org/apache/flink/ml/feature/PolynomialBase.scala ---------------------------------------------------------------------- diff --git a/flink-staging/flink-ml/src/main/scala/org/apache/flink/ml/feature/PolynomialBase.scala b/flink-staging/flink-ml/src/main/scala/org/apache/flink/ml/feature/PolynomialBase.scala index 04f698e..61f477c 100644 --- a/flink-staging/flink-ml/src/main/scala/org/apache/flink/ml/feature/PolynomialBase.scala +++ b/flink-staging/flink-ml/src/main/scala/org/apache/flink/ml/feature/PolynomialBase.scala @@ -75,7 +75,7 @@ class PolynomialBase extends Transformer[LabeledVector, LabeledVector] with Seri val transformedVector = calculatePolynomial(degree, vector) - LabeledVector(transformedVector, label) + LabeledVector(label, transformedVector) } } } http://git-wip-us.apache.org/repos/asf/flink/blob/015af177/flink-staging/flink-ml/src/main/scala/org/apache/flink/ml/math/DenseMatrix.scala ---------------------------------------------------------------------- diff --git a/flink-staging/flink-ml/src/main/scala/org/apache/flink/ml/math/DenseMatrix.scala b/flink-staging/flink-ml/src/main/scala/org/apache/flink/ml/math/DenseMatrix.scala index 16291b8..fd490e1 100644 --- a/flink-staging/flink-ml/src/main/scala/org/apache/flink/ml/math/DenseMatrix.scala +++ b/flink-staging/flink-ml/src/main/scala/org/apache/flink/ml/math/DenseMatrix.scala @@ -26,9 +26,12 @@ package org.apache.flink.ml.math * @param numCols Number of columns * @param data Array of matrix elements in column major order */ -case class DenseMatrix(val numRows: Int, - val numCols: Int, - val data: Array[Double]) extends Matrix { +case class DenseMatrix( + val numRows: Int, + val numCols: Int, + val data: Array[Double]) + extends Matrix + with Serializable{ import DenseMatrix._ http://git-wip-us.apache.org/repos/asf/flink/blob/015af177/flink-staging/flink-ml/src/main/scala/org/apache/flink/ml/math/DenseVector.scala ---------------------------------------------------------------------- diff --git a/flink-staging/flink-ml/src/main/scala/org/apache/flink/ml/math/DenseVector.scala b/flink-staging/flink-ml/src/main/scala/org/apache/flink/ml/math/DenseVector.scala index 50992a9..ab657c5 100644 --- a/flink-staging/flink-ml/src/main/scala/org/apache/flink/ml/math/DenseVector.scala +++ b/flink-staging/flink-ml/src/main/scala/org/apache/flink/ml/math/DenseVector.scala @@ -24,7 +24,7 @@ package org.apache.flink.ml.math * * @param data Array of doubles to store the vector elements */ -case class DenseVector(val data: Array[Double]) extends Vector { +case class DenseVector(val data: Array[Double]) extends Vector with Serializable { /** * Number of elements in a vector http://git-wip-us.apache.org/repos/asf/flink/blob/015af177/flink-staging/flink-ml/src/main/scala/org/apache/flink/ml/math/SparseMatrix.scala ---------------------------------------------------------------------- diff --git a/flink-staging/flink-ml/src/main/scala/org/apache/flink/ml/math/SparseMatrix.scala b/flink-staging/flink-ml/src/main/scala/org/apache/flink/ml/math/SparseMatrix.scala index b065630..c9842f8 100644 --- a/flink-staging/flink-ml/src/main/scala/org/apache/flink/ml/math/SparseMatrix.scala +++ b/flink-staging/flink-ml/src/main/scala/org/apache/flink/ml/math/SparseMatrix.scala @@ -37,7 +37,8 @@ class SparseMatrix( val rowIndices: Array[Int], val colPtrs: Array[Int], val data: Array[Double]) - extends Matrix { + extends Matrix + with Serializable { /** Element wise access function * http://git-wip-us.apache.org/repos/asf/flink/blob/015af177/flink-staging/flink-ml/src/main/scala/org/apache/flink/ml/math/SparseVector.scala ---------------------------------------------------------------------- diff --git a/flink-staging/flink-ml/src/main/scala/org/apache/flink/ml/math/SparseVector.scala b/flink-staging/flink-ml/src/main/scala/org/apache/flink/ml/math/SparseVector.scala index 9fa69cb..2c63203 100644 --- a/flink-staging/flink-ml/src/main/scala/org/apache/flink/ml/math/SparseVector.scala +++ b/flink-staging/flink-ml/src/main/scala/org/apache/flink/ml/math/SparseVector.scala @@ -27,7 +27,8 @@ class SparseVector( val size: Int, val indices: Array[Int], val data: Array[Double]) - extends Vector { + extends Vector + with Serializable { /** Updates the element at the given index with the provided value * * @param index @@ -77,6 +78,11 @@ class SparseVector( denseVector } + override def toString: String = { + val entries = indices.zip(data).mkString(", ") + "SparseVector(" + entries + ")" + } + private def locate(index: Int): Int = { require(0 <= index && index < size, index + " not in [0, " + size + ")") http://git-wip-us.apache.org/repos/asf/flink/blob/015af177/flink-staging/flink-ml/src/main/scala/org/apache/flink/ml/package.scala ---------------------------------------------------------------------- diff --git a/flink-staging/flink-ml/src/main/scala/org/apache/flink/ml/package.scala b/flink-staging/flink-ml/src/main/scala/org/apache/flink/ml/package.scala new file mode 100644 index 0000000..250c8cb --- /dev/null +++ b/flink-staging/flink-ml/src/main/scala/org/apache/flink/ml/package.scala @@ -0,0 +1,46 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.flink + +import org.apache.flink.api.java.operators.DataSink +import org.apache.flink.api.scala.{DataSet, ExecutionEnvironment} +import org.apache.flink.ml.common.LabeledVector + +package object ml { + + /** Pimp my [[ExecutionEnvironment]] to directly support `readLibSVM` + * + * @param executionEnvironment + */ + implicit class RichExecutionEnvironment(executionEnvironment: ExecutionEnvironment) { + def readLibSVM(path: String): DataSet[LabeledVector] = { + MLUtils.readLibSVM(executionEnvironment, path) + } + } + + /** Pimp my [[DataSet]] to directly support `writeAsLibSVM` + * + * @param dataSet + */ + implicit class RichDataSet(dataSet: DataSet[LabeledVector]) { + def writeAsLibSVM(path: String): DataSink[String] = { + MLUtils.writeLibSVM(path, dataSet) + } + } +} http://git-wip-us.apache.org/repos/asf/flink/blob/015af177/flink-staging/flink-ml/src/main/scala/org/apache/flink/ml/regression/MultipleLinearRegression.scala ---------------------------------------------------------------------- diff --git a/flink-staging/flink-ml/src/main/scala/org/apache/flink/ml/regression/MultipleLinearRegression.scala b/flink-staging/flink-ml/src/main/scala/org/apache/flink/ml/regression/MultipleLinearRegression.scala index 9768cce..076156d 100644 --- a/flink-staging/flink-ml/src/main/scala/org/apache/flink/ml/regression/MultipleLinearRegression.scala +++ b/flink-staging/flink-ml/src/main/scala/org/apache/flink/ml/regression/MultipleLinearRegression.scala @@ -441,7 +441,7 @@ Transformer[ Vector, LabeledVector ] { val prediction = dotProduct + weight0 - LabeledVector(value, prediction) + LabeledVector(prediction, value) } } } http://git-wip-us.apache.org/repos/asf/flink/blob/015af177/flink-staging/flink-ml/src/test/scala/org/apache/flink/ml/MLUtilsSuite.scala ---------------------------------------------------------------------- diff --git a/flink-staging/flink-ml/src/test/scala/org/apache/flink/ml/MLUtilsSuite.scala b/flink-staging/flink-ml/src/test/scala/org/apache/flink/ml/MLUtilsSuite.scala new file mode 100644 index 0000000..47682ce --- /dev/null +++ b/flink-staging/flink-ml/src/test/scala/org/apache/flink/ml/MLUtilsSuite.scala @@ -0,0 +1,112 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.flink.ml + +import java.io.File + +import scala.io.Source + +import org.scalatest.{FlatSpec, Matchers} + +import org.apache.flink.api.scala._ +import org.apache.flink.api.scala.ExecutionEnvironment +import org.apache.flink.ml.common.LabeledVector +import org.apache.flink.ml.math.SparseVector +import org.apache.flink.test.util.FlinkTestBase +import org.apache.flink.testutils.TestFileUtils + +class MLUtilsSuite extends FlatSpec with Matchers with FlinkTestBase { + + behavior of "The RichExecutionEnvironment" + + it should "read a libSVM/SVMLight input file" in { + val env = ExecutionEnvironment.getExecutionEnvironment + + val content = + """ + |1 2:10.0 4:4.5 8:4.2 # foo + |-1 1:9.0 4:-4.5 7:2.4 # bar + |0.4 3:1.0 8:-5.6 10:1.0 + |-42.1 2:2.0 4:-6.1 3:5.1 # svm + """.stripMargin + + val expectedLabeledVectors = Set( + LabeledVector(1, SparseVector.fromCOO(10, (1, 10), (3, 4.5), (7, 4.2))), + LabeledVector(-1, SparseVector.fromCOO(10, (0, 9), (3, -4.5), (6, 2.4))), + LabeledVector(0.4, SparseVector.fromCOO(10, (2, 1), (7, -5.6), (9, 1))), + LabeledVector(-42.1, SparseVector.fromCOO(10, (1, 2), (3, -6.1), (2, 5.1))) + ) + + val inputFilePath = TestFileUtils.createTempFile(content) + + val svmInput = env.readLibSVM(inputFilePath) + + val labeledVectors = svmInput.collect + + labeledVectors.size should be(expectedLabeledVectors.size) + + for(lVector <- labeledVectors) { + expectedLabeledVectors.contains(lVector) should be(true) + } + + } + + it should "write a libSVM/SVMLight output file" in { + val env = ExecutionEnvironment.getExecutionEnvironment + + val labeledVectors = Seq( + LabeledVector(1.0, SparseVector.fromCOO(10, (1, 10), (3, 4.5), (7, 4.2))), + LabeledVector(-1.0, SparseVector.fromCOO(10, (0, 9), (3, -4.5), (6, 2.4))), + LabeledVector(0.4, SparseVector.fromCOO(10, (2, 1), (7, -5.6), (9, 1))), + LabeledVector(-42.1, SparseVector.fromCOO(10, (1, 2), (3, -6.1), (2, 5.1))) + ) + + val expectedLines = List( + "1.0 2:10.0 4:4.5 8:4.2", + "-1.0 1:9.0 4:-4.5 7:2.4", + "0.4 3:1.0 8:-5.6 10:1.0", + "-42.1 2:2.0 3:5.1 4:-6.1" + ) + + val labeledVectorsDS = env.fromCollection(labeledVectors) + + val tempDir = new File(System.getProperty("java.io.tmpdir")); + + val tempFile = new File(tempDir, TestFileUtils.randomFileName()) + + val outputFilePath = tempFile.getAbsolutePath + + labeledVectorsDS.writeAsLibSVM(outputFilePath) + + env.execute() + + val src = Source.fromFile(tempFile) + + var counter = 0 + + for(l <- src.getLines()) { + expectedLines.exists(_.equals(l)) should be(true) + counter += 1 + } + + counter should be(expectedLines.size) + + tempFile.delete() + } +} http://git-wip-us.apache.org/repos/asf/flink/blob/015af177/flink-staging/flink-ml/src/test/scala/org/apache/flink/ml/feature/PolynomialBaseITSuite.scala ---------------------------------------------------------------------- diff --git a/flink-staging/flink-ml/src/test/scala/org/apache/flink/ml/feature/PolynomialBaseITSuite.scala b/flink-staging/flink-ml/src/test/scala/org/apache/flink/ml/feature/PolynomialBaseITSuite.scala index 5529e17..0b321ff 100644 --- a/flink-staging/flink-ml/src/test/scala/org/apache/flink/ml/feature/PolynomialBaseITSuite.scala +++ b/flink-staging/flink-ml/src/test/scala/org/apache/flink/ml/feature/PolynomialBaseITSuite.scala @@ -39,8 +39,8 @@ class PolynomialBaseITSuite env.setParallelism (2) val input = Seq ( - LabeledVector (DenseVector (1), 1.0), - LabeledVector (DenseVector (2), 2.0) + LabeledVector (1.0, DenseVector (1)), + LabeledVector (2.0, DenseVector (2)) ) val inputDS = env.fromCollection (input) @@ -69,8 +69,8 @@ class PolynomialBaseITSuite env.setParallelism(2) val input = Seq( - LabeledVector(DenseVector(2, 3), 1.0), - LabeledVector(DenseVector(2, 3, 4), 2.0) + LabeledVector(1.0, DenseVector(2, 3)), + LabeledVector(2.0, DenseVector(2, 3, 4)) ) val expectedMap = List( @@ -100,8 +100,8 @@ class PolynomialBaseITSuite env.setParallelism(2) val input = Seq( - LabeledVector(DenseVector(2, 3), 1.0), - LabeledVector(DenseVector(2, 3, 4), 2.0) + LabeledVector(1.0, DenseVector(2, 3)), + LabeledVector(2.0, DenseVector(2, 3, 4)) ) val inputDS = env.fromCollection(input) http://git-wip-us.apache.org/repos/asf/flink/blob/015af177/flink-staging/flink-ml/src/test/scala/org/apache/flink/ml/regression/RegressionData.scala ---------------------------------------------------------------------- diff --git a/flink-staging/flink-ml/src/test/scala/org/apache/flink/ml/regression/RegressionData.scala b/flink-staging/flink-ml/src/test/scala/org/apache/flink/ml/regression/RegressionData.scala index 349f14f..82b4cc3 100644 --- a/flink-staging/flink-ml/src/test/scala/org/apache/flink/ml/regression/RegressionData.scala +++ b/flink-staging/flink-ml/src/test/scala/org/apache/flink/ml/regression/RegressionData.scala @@ -28,46 +28,46 @@ object RegressionData { val expectedSquaredResidualSum: Double = 49.7596 val data: Seq[LabeledVector] = Seq( - LabeledVector(DenseVector(0.2714), 10.7949), - LabeledVector(DenseVector(0.1008), 10.6426), - LabeledVector(DenseVector(0.5078), 10.5603), - LabeledVector(DenseVector(0.5856), 12.8707), - LabeledVector(DenseVector(0.7629), 10.7026), - LabeledVector(DenseVector(0.0830), 9.8571), - LabeledVector(DenseVector(0.6616), 10.5001), - LabeledVector(DenseVector(0.5170), 11.2063), - LabeledVector(DenseVector(0.1710), 9.1892), - LabeledVector(DenseVector(0.9386), 12.2408), - LabeledVector(DenseVector(0.5905), 11.0307), - LabeledVector(DenseVector(0.4406), 10.1369), - LabeledVector(DenseVector(0.9419), 10.7609), - LabeledVector(DenseVector(0.6559), 12.5328), - LabeledVector(DenseVector(0.4519), 13.3560), - LabeledVector(DenseVector(0.8397), 14.7424), - LabeledVector(DenseVector(0.5326), 11.1057), - LabeledVector(DenseVector(0.5539), 11.6157), - LabeledVector(DenseVector(0.6801), 11.5744), - LabeledVector(DenseVector(0.3672), 11.1775), - LabeledVector(DenseVector(0.2393), 9.7991), - LabeledVector(DenseVector(0.5789), 9.8173), - LabeledVector(DenseVector(0.8669), 12.5642), - LabeledVector(DenseVector(0.4068), 9.9952), - LabeledVector(DenseVector(0.1126), 8.4354), - LabeledVector(DenseVector(0.4438), 13.7058), - LabeledVector(DenseVector(0.3002), 10.6672), - LabeledVector(DenseVector(0.4014), 11.6080), - LabeledVector(DenseVector(0.8334), 13.6926), - LabeledVector(DenseVector(0.4036), 9.5261), - LabeledVector(DenseVector(0.3902), 11.5837), - LabeledVector(DenseVector(0.3604), 11.5831), - LabeledVector(DenseVector(0.1403), 10.5038), - LabeledVector(DenseVector(0.2601), 10.9382), - LabeledVector(DenseVector(0.0868), 9.7325), - LabeledVector(DenseVector(0.4294), 12.0113), - LabeledVector(DenseVector(0.2573), 9.9219), - LabeledVector(DenseVector(0.2976), 10.0963), - LabeledVector(DenseVector(0.4249), 11.9999), - LabeledVector(DenseVector(0.1192), 12.0442) + LabeledVector(10.7949, DenseVector(0.2714)), + LabeledVector(10.6426, DenseVector(0.1008)), + LabeledVector(10.5603, DenseVector(0.5078)), + LabeledVector(12.8707, DenseVector(0.5856)), + LabeledVector(10.7026, DenseVector(0.7629)), + LabeledVector(9.8571, DenseVector(0.0830)), + LabeledVector(10.5001, DenseVector(0.6616)), + LabeledVector(11.2063, DenseVector(0.5170)), + LabeledVector(9.1892, DenseVector(0.1710)), + LabeledVector(12.2408, DenseVector(0.9386)), + LabeledVector(11.0307, DenseVector(0.5905)), + LabeledVector(10.1369, DenseVector(0.4406)), + LabeledVector(10.7609, DenseVector(0.9419)), + LabeledVector(12.5328, DenseVector(0.6559)), + LabeledVector(13.3560, DenseVector(0.4519)), + LabeledVector(14.7424, DenseVector(0.8397)), + LabeledVector(11.1057, DenseVector(0.5326)), + LabeledVector(11.6157, DenseVector(0.5539)), + LabeledVector(11.5744, DenseVector(0.6801)), + LabeledVector(11.1775, DenseVector(0.3672)), + LabeledVector(9.7991, DenseVector(0.2393)), + LabeledVector(9.8173, DenseVector(0.5789)), + LabeledVector(12.5642, DenseVector(0.8669)), + LabeledVector(9.9952, DenseVector(0.4068)), + LabeledVector(8.4354, DenseVector(0.1126)), + LabeledVector(13.7058, DenseVector(0.4438)), + LabeledVector(10.6672, DenseVector(0.3002)), + LabeledVector(11.6080, DenseVector(0.4014)), + LabeledVector(13.6926, DenseVector(0.8334)), + LabeledVector(9.5261, DenseVector(0.4036)), + LabeledVector(11.5837, DenseVector(0.3902)), + LabeledVector(11.5831, DenseVector(0.3604)), + LabeledVector(10.5038, DenseVector(0.1403)), + LabeledVector(10.9382, DenseVector(0.2601)), + LabeledVector(9.7325, DenseVector(0.0868)), + LabeledVector(12.0113, DenseVector(0.4294)), + LabeledVector(9.9219, DenseVector(0.2573)), + LabeledVector(10.0963, DenseVector(0.2976)), + LabeledVector(11.9999, DenseVector(0.4249)), + LabeledVector(12.0442, DenseVector(0.1192)) ) val expectedPolynomialWeights = Seq(0.2375, -0.3493, -0.1674) @@ -75,55 +75,55 @@ object RegressionData { val expectedPolynomialSquaredResidualSum = 1.5389e+03 val polynomialData: Seq[LabeledVector] = Seq( - LabeledVector(DenseVector(3.6663), 2.1415), - LabeledVector(DenseVector(4.0761), 10.9835), - LabeledVector(DenseVector(0.5714), 7.2507), - LabeledVector(DenseVector(4.1102), 11.9274), - LabeledVector(DenseVector(2.8456), -4.2798), - LabeledVector(DenseVector(0.4389), 7.1929), - LabeledVector(DenseVector(1.2532), 4.5097), - LabeledVector(DenseVector(2.4610), -3.6059), - LabeledVector(DenseVector(4.3088), 18.1132), - LabeledVector(DenseVector(4.3420), 19.2674), - LabeledVector(DenseVector(0.7093), 7.0664), - LabeledVector(DenseVector(4.3677), 20.1836), - LabeledVector(DenseVector(4.3073), 18.0609), - LabeledVector(DenseVector(2.1842), -2.2090), - LabeledVector(DenseVector(3.6013), 1.1306), - LabeledVector(DenseVector(0.6385), 7.1903), - LabeledVector(DenseVector(1.8979), -0.2668), - LabeledVector(DenseVector(4.1208), 12.2281), - LabeledVector(DenseVector(3.5649), 0.6086), - LabeledVector(DenseVector(4.3177), 18.4202), - LabeledVector(DenseVector(2.9508), -4.1284), - LabeledVector(DenseVector(0.1607), 6.1964), - LabeledVector(DenseVector(3.8211), 4.9638), - LabeledVector(DenseVector(4.2030), 14.6677), - LabeledVector(DenseVector(3.0543), -3.8132), - LabeledVector(DenseVector(3.4098), -1.2891), - LabeledVector(DenseVector(3.3441), -1.9390), - LabeledVector(DenseVector(1.7650), 0.7293), - LabeledVector(DenseVector(2.9497), -4.1310), - LabeledVector(DenseVector(0.7703), 6.9131), - LabeledVector(DenseVector(3.1772), -3.2060), - LabeledVector(DenseVector(0.1432), 6.0899), - LabeledVector(DenseVector(1.2462), 4.5567), - LabeledVector(DenseVector(0.2078), 6.4562), - LabeledVector(DenseVector(0.4371), 7.1903), - LabeledVector(DenseVector(3.7056), 2.8017), - LabeledVector(DenseVector(3.1267), -3.4873), - LabeledVector(DenseVector(1.4269), 3.2918), - LabeledVector(DenseVector(4.2760), 17.0085), - LabeledVector(DenseVector(0.1550), 6.1622), - LabeledVector(DenseVector(1.9743), -0.8192), - LabeledVector(DenseVector(1.7170), 1.0957), - LabeledVector(DenseVector(3.4448), -0.9065), - LabeledVector(DenseVector(3.5784), 0.7986), - LabeledVector(DenseVector(0.8409), 6.6861), - LabeledVector(DenseVector(2.2039), -2.3274), - LabeledVector(DenseVector(2.0051), -1.0359), - LabeledVector(DenseVector(2.9084), -4.2092), - LabeledVector(DenseVector(3.1921), -3.1140), - LabeledVector(DenseVector(3.3961), -1.4323) + LabeledVector(2.1415, DenseVector(3.6663)), + LabeledVector(10.9835, DenseVector(4.0761)), + LabeledVector(7.2507, DenseVector(0.5714)), + LabeledVector(11.9274, DenseVector(4.1102)), + LabeledVector(-4.2798, DenseVector(2.8456)), + LabeledVector(7.1929, DenseVector(0.4389)), + LabeledVector(4.5097, DenseVector(1.2532)), + LabeledVector(-3.6059, DenseVector(2.4610)), + LabeledVector(18.1132, DenseVector(4.3088)), + LabeledVector(19.2674, DenseVector(4.3420)), + LabeledVector(7.0664, DenseVector(0.7093)), + LabeledVector(20.1836, DenseVector(4.3677)), + LabeledVector(18.0609, DenseVector(4.3073)), + LabeledVector(-2.2090, DenseVector(2.1842)), + LabeledVector(1.1306, DenseVector(3.6013)), + LabeledVector(7.1903, DenseVector(0.6385)), + LabeledVector(-0.2668, DenseVector(1.8979)), + LabeledVector(12.2281, DenseVector(4.1208)), + LabeledVector(0.6086, DenseVector(3.5649)), + LabeledVector(18.4202, DenseVector(4.3177)), + LabeledVector(-4.1284, DenseVector(2.9508)), + LabeledVector(6.1964, DenseVector(0.1607)), + LabeledVector(4.9638, DenseVector(3.8211)), + LabeledVector(14.6677, DenseVector(4.2030)), + LabeledVector(-3.8132, DenseVector(3.0543)), + LabeledVector(-1.2891, DenseVector(3.4098)), + LabeledVector(-1.9390, DenseVector(3.3441)), + LabeledVector(0.7293, DenseVector(1.7650)), + LabeledVector(-4.1310, DenseVector(2.9497)), + LabeledVector(6.9131, DenseVector(0.7703)), + LabeledVector(-3.2060, DenseVector(3.1772)), + LabeledVector(6.0899, DenseVector(0.1432)), + LabeledVector(4.5567, DenseVector(1.2462)), + LabeledVector(6.4562, DenseVector(0.2078)), + LabeledVector(7.1903, DenseVector(0.4371)), + LabeledVector(2.8017, DenseVector(3.7056)), + LabeledVector(-3.4873, DenseVector(3.1267)), + LabeledVector(3.2918, DenseVector(1.4269)), + LabeledVector(17.0085, DenseVector(4.2760)), + LabeledVector(6.1622, DenseVector(0.1550)), + LabeledVector(-0.8192, DenseVector(1.9743)), + LabeledVector(1.0957, DenseVector(1.7170)), + LabeledVector(-0.9065, DenseVector(3.4448)), + LabeledVector(0.7986, DenseVector(3.5784)), + LabeledVector(6.6861, DenseVector(0.8409)), + LabeledVector(-2.3274, DenseVector(2.2039)), + LabeledVector(-1.0359, DenseVector(2.0051)), + LabeledVector(-4.2092, DenseVector(2.9084)), + LabeledVector(-3.1140, DenseVector(3.1921)), + LabeledVector(-1.4323, DenseVector(3.3961)) ) }