[
https://issues.apache.org/jira/browse/MAHOUT-1962?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=15995519#comment-15995519
]
ASF GitHub Bot commented on MAHOUT-1962:
----------------------------------------
Github user rawkintrevo commented on a diff in the pull request:
https://github.com/apache/mahout/pull/300#discussion_r114637027
--- Diff:
math-scala/src/main/scala/org/apache/mahout/math/algorithms/regression/LinearRegressorModel.scala
---
@@ -54,46 +58,72 @@ trait LinearRegressorFitter[K] extends
RegressorFitter[K] {
addIntercept = hyperparameters.asInstanceOf[Map[Symbol,
Boolean]].getOrElse('addIntercept, true)
}
+
def calculateStandardError[M[K] <: LinearRegressorModel[K]](X:
DrmLike[K],
drmTarget: DrmLike[K],
drmXtXinv: Matrix,
model: M[K]): M[K] = {
import org.apache.mahout.math.function.Functions.SQRT
import org.apache.mahout.math.scalabindings.MahoutCollections._
- var modelOut = model
+
val yhat = X %*% model.beta
val residuals = drmTarget - yhat
- val ete = (residuals.t %*% residuals).collect // 1x1
+
+ // Setting modelOut.rss
+ // Changed name from ete, to rssModel. This is residual sum of
squares for model of yhat vs y
+ var modelOut = calculateResidualSumOfSquares(model,residuals)
+
val n = drmTarget.nrow
val k = safeToNonNegInt(X.ncol)
val invDegFreedomKindOf = 1.0 / (n - k)
- val varCovarMatrix = invDegFreedomKindOf * ete(0,0) * drmXtXinv
+ val varCovarMatrix = invDegFreedomKindOf * modelOut.rss * drmXtXinv
val se = varCovarMatrix.viewDiagonal.assign(SQRT)
val tScore = model.beta / se
- val tDist = new
org.apache.commons.math3.distribution.TDistribution(n-k)
+ val tDist = new TDistribution(n-k)
+
val pval = dvec(tScore.toArray.map(t => 2 * (1.0 -
tDist.cumulativeProbability(Math.abs(t))) ))
+
// ^^ TODO bug in this calculation- fix and add test
//degreesFreedom = k
-
-
modelOut.se = se
modelOut.tScore = tScore
modelOut.pval = pval
- modelOut.degreesFreedom = X.ncol
- modelOut.summary = generateSummaryString(modelOut)
+ // for degrees of freedom, dont count the intercept term that was added
+ modelOut.degreesFreedom = X.ncol - 1
+
+ modelOut.trainingExamples = n.toInt
+
if (calcCommonStatistics){
- modelOut = calculateCommonStatistics(modelOut, drmTarget, residuals)
+ modelOut = calculateCommonStatistics(modelOut, X, drmTarget,
residuals)
--- End diff --
OK- you changed this to take `X` but the only thing you use X for is to
calculate teh d.o.f. and `model.trainingExamples` which are already included in
teh model (you just added them). I would refactor this so X isn't a parameter.
> Add F-test to Linear Regression Fitness Tests
> ----------------------------------------------
>
> Key: MAHOUT-1962
> URL: https://issues.apache.org/jira/browse/MAHOUT-1962
> Project: Mahout
> Issue Type: Improvement
> Components: Algorithms
> Affects Versions: 0.12.0, 0.12.1, 0.13.0, 0.12.2
> Reporter: Dustin VanStee
> Priority: Minor
> Labels: beginner
> Fix For: 0.13.1
>
> Original Estimate: 96h
> Remaining Estimate: 96h
>
> This update will modify
> org.apache.mahout.math.algorithms.regression.tests.FittnessTests.scala and
> add an overall Ftest for significance of one or more parameters being not
> equal to zero.
--
This message was sent by Atlassian JIRA
(v6.3.15#6346)