Hello, I have some code trying to compare linear regression coefficients with three sets of features, as shown below. On the third one, I get an assertion error.
This is the code, object MultipleRegression extends App { val spark = SparkSession.builder().appName("Regression Model Builder").master("local").getOrCreate() import spark.implicits._ val training = build("kc_house_train_data.csv", "train", spark) val test = build("kc_house_test_data.csv", "test", spark) val lr = new LinearRegression() val m1 = lr.fit(training.map(r => buildLp(r, "sqft_living", "bedrooms", "bathrooms", "lat", "long"))) println(s"Coefficients: ${m1.coefficients}, Intercept: ${m1.intercept}") val m2 = lr.fit(training.map(r => buildLp(r, "sqft_living", "bedrooms", "bathrooms", "lat", "long", "bed_bath_rooms"))) println(s"Coefficients: ${m2.coefficients}, Intercept: ${m2.intercept}") val m3 = lr.fit(training.map(r => buildLp(r, "sqft_living", "bedrooms", "bathrooms", "lat", "long", "bed_bath_rooms", "bedrooms_squared", "log_sqft_living", "lat_plus_long"))) println(s"Coefficients: ${m3.coefficients}, Intercept: ${m3.intercept}") def build(path: String, view: String, spark: SparkSession) = { val toDouble = udf((x: String) => x.toDouble) val product = udf((x: Double, y: Double) => x * y) val sum = udf((x: Double, y: Double) => x + y) val log = udf((x: Double) => scala.math.log(x)) spark.read. option("header", "true"). csv(path). withColumn("sqft_living", toDouble('sqft_living)). withColumn("price", toDouble('price)). withColumn("bedrooms", toDouble('bedrooms)). withColumn("bathrooms", toDouble('bathrooms)). withColumn("lat", toDouble('lat)). withColumn("long", toDouble('long)). withColumn("bedrooms_squared", product('bedrooms, 'bedrooms)). withColumn("bed_bath_rooms", product('bedrooms, 'bathrooms)). withColumn("lat_plus_long", sum('lat, 'long)). withColumn("log_sqft_living", log('sqft_living)) } def buildLp(r: Row, input: String*) = { var features = input.map(r.getAs[Double](_)).toArray new LabeledPoint(r.getAs[Double]("price"), Vectors.dense(features)) } } This is the error I get. Exception in thread "main" java.lang.AssertionError: assertion failed: lapack.dppsv returned 9. at scala.Predef$.assert(Predef.scala:170) at org.apache.spark.mllib.linalg.CholeskyDecomposition$.solve(CholeskyDecomposition.scala:40) at org.apache.spark.ml.optim.WeightedLeastSquares.fit(WeightedLeastSquares.scala:140) at org.apache.spark.ml.regression.LinearRegression.train(LinearRegression.scala:180) at org.apache.spark.ml.regression.LinearRegression.train(LinearRegression.scala:70) at org.apache.spark.ml.Predictor.fit(Predictor.scala:90) at com.ss.ml.regression.MultipleRegression$.delayedEndpoint$com$ss$ml$regression$MultipleRegression$1(MultipleRegression.scala:36) at com.ss.ml.regression.MultipleRegression$delayedInit$body.apply(MultipleRegression.scala:12) at scala.Function0$class.apply$mcV$sp(Function0.scala:34) at scala.runtime.AbstractFunction0.apply$mcV$sp(AbstractFunction0.scala:12) at scala.App$$anonfun$main$1.apply(App.scala:76) at scala.App$$anonfun$main$1.apply(App.scala:76) at scala.collection.immutable.List.foreach(List.scala:381) at scala.collection.generic.TraversableForwarder$class.foreach(TraversableForwarder.scala:35) at scala.App$class.main(App.scala:76) at com.ss.ml.regression.MultipleRegression$.main(MultipleRegression.scala:12) at com.ss.ml.regression.MultipleRegression.main(MultipleRegression.scala) at sun.reflect.NativeMethodAccessorImpl.invoke0(Native Method) at sun.reflect.NativeMethodAccessorImpl.invoke(NativeMethodAccessorImpl.java:62) at sun.reflect.DelegatingMethodAccessorImpl.invoke(DelegatingMethodAccessorImpl.java:43) at java.lang.reflect.Method.invoke(Method.java:483) at com.intellij.rt.execution.application.AppMain.main(AppMain.java:140) Does anyone know what is going wrong here? Many thanks -- *Meeraj Kunnumpurath* *Director and Executive PrincipalService Symphony Ltd00 44 7702 693597* *00 971 50 409 0169mee...@servicesymphony.com <mee...@servicesymphony.com>*