The attachment seems to have been dropped, so I'm pasting the code below. Sorry for that Liviu
On Wed, Aug 24, 2011 at 1:44 PM, Liviu Andronic <landronim...@gmail.com> wrote: >> Second, penalty in ols() is not the same as the ridge constant in lm.ridge, >> but rather a penalty on the log likelihood. The documentation >> for ols refers to ?lrm for the description. >> > I see. At the same time ?rms::ols contains: "The penalized maximum > likelihood estimate (penalized least squares or ridge estimate) of > beta is (X'X + P)^{-1} X'Y.", while ?rms::lrm defines P as "penalty > \times diag(pf) \times penalty.matrix \times diag(pf), where pf is the > vector of square roots of penalty factors computed from penalty". This > is suspiciously similar to the definition of the "classical" Ridge > Regression beta estimates (X'X + kI)^{-1} X'Y, where k is the lambda > constant and I the identity matrix. > > Hence I was wondering if rms::ols could be forced to use 'P = kI', and > thus compute the "classical" Ridge estimates? I tried to hack rms::ols > to ensure that it passed 'kI' to lm.pfit(..., penalty.matrix = ...), > but I get strange results so I must be missing something. (See > attached.) > ols.ridge <- function (formula, data, weights, subset, na.action = na.delete, method = "qr", model = FALSE, x = FALSE, y = FALSE, se.fit = FALSE, linear.predictors = TRUE, penalty = 0, penalty.matrix, tol = 1e-07, sigma = NULL, var.penalty = c("simple", "sandwich"), ridge=TRUE, ...) { call <- match.call() var.penalty <- match.arg(var.penalty) m <- match.call(expand = FALSE) mc <- match(c("formula", "data", "subset", "weights", "na.action"), names(m), 0) m <- m[c(1, mc)] m$na.action <- na.action m$drop.unused.levels <- TRUE m[[1]] <- as.name("model.frame") if(ridge) lambda <- penalty if (length(attr(terms(formula), "term.labels"))) { dul <- .Options$drop.unused.levels if (!length(dul) || dul) { on.exit(options(drop.unused.levels = dul)) options(drop.unused.levels = FALSE) } X <- Design(eval.parent(m)) options(drop.unused.levels = dul) atrx <- attributes(X) atr <- atrx$Design nact <- atrx$na.action Terms <- atrx$terms assig <- DesignAssign(atr, 1, Terms) penpres <- FALSE if (!missing(penalty) && any(unlist(penalty) != 0)) penpres <- TRUE if (!missing(penalty.matrix) && any(penalty.matrix != 0)) penpres <- TRUE if (penpres && missing(var.penalty)) warning("default for var.penalty has changed to \"simple\"") if (method == "model.frame") return(X) scale <- as.character(formula[2]) attr(Terms, "formula") <- formula weights <- model.extract(X, "weights") if (length(weights) && penpres) stop("may not specify penalty with weights") Y <- model.extract(X, "response") n <- length(Y) if (model) m <- X X <- model.matrix(Terms, X) if (length(atr$colnames)) dimnames(X)[[2]] <- c("Intercept", atr$colnames) else dimnames(X)[[2]] <- c("Intercept", dimnames(X)[[2]][-1]) if (method == "model.matrix") return(X) } else { if (length(weights)) stop("weights not implemented when no covariables are present") assig <- NULL yy <- attr(terms(formula), "variables")[1] Y <- eval(yy, sys.parent(2)) nmiss <- sum(is.na(Y)) if (nmiss == 0) nmiss <- NULL else names(nmiss) <- as.character(yy) Y <- Y[!is.na(Y)] yest <- mean(Y) coef <- yest n <- length(Y) if (!length(sigma)) sigma <- sqrt(sum((Y - yest)^2)/(n - 1)) cov <- matrix(sigma * sigma/n, nrow = 1, ncol = 1, dimnames = list("Intercept", "Intercept")) fit <- list(coefficients = coef, var = cov, non.slopes = 1, fail = FALSE, residuals = Y - yest, df.residual = n - 1, intercept = TRUE) if (linear.predictors) { fit$linear.predictors <- rep(yest, n) names(fit$linear.predictors) <- names(Y) } if (model) fit$model <- m if (x) fit$x <- matrix(1, ncol = 1, nrow = n, dimnames = list(NULL, "Intercept")) if (y) fit$y <- Y fit$fitFunction <- c("ols", "lm") oldClass(fit) <- c("ols", "rms", "lm") return(fit) } if (!penpres) { fit <- if (length(weights)) lm.wfit(X, Y, weights, method = method, ...) else lm.fit(X, Y, method = method, ...) cov.unscaled <- chol2inv(fit$qr$qr) r <- fit$residuals yhat <- Y - r if (length(weights)) { sse <- sum(weights * r^2) m <- sum(weights * yhat/sum(weights)) ssr <- sum(weights * (yhat - m)^2) r2 <- ssr/(ssr + sse) if (!length(sigma)) sigma <- sqrt(sse/fit$df.residual) } else { sse <- sum(fit$residuals^2) if (!length(sigma)) sigma <- sqrt(sse/fit$df.residual) r2 <- 1 - sse/sum((Y - mean(Y))^2) } fit$var <- sigma * sigma * cov.unscaled cnam <- dimnames(X)[[2]] dimnames(fit$var) <- list(cnam, cnam) fit$stats <- c(n = n, `Model L.R.` = -n * logb(1 - r2), d.f. = length(fit$coef) - 1, R2 = r2, g = GiniMd(yhat), Sigma = sigma) } else { p <- length(atr$colnames) if (missing(penalty.matrix)) penalty.matrix <- Penalty.matrix(atr, X) if (nrow(penalty.matrix) != p || ncol(penalty.matrix) != p) stop("penalty matrix does not have", p, "rows and columns") psetup <- Penalty.setup(atr, penalty) penalty <- psetup$penalty multiplier <- psetup$multiplier if (length(multiplier) == 1) penalty.matrix <- multiplier * penalty.matrix else { a <- diag(sqrt(multiplier)) penalty.matrix <- a %*% penalty.matrix %*% a } if(ridge) { stopifnot(length(lambda)==1) penalty.matrix <- lambda * diag(p) if (nrow(penalty.matrix) != p || ncol(penalty.matrix) != p) stop("(ridge) penalty matrix does not have", p, "rows and columns") } fit <- lm.pfit(X, Y, penalty.matrix = penalty.matrix, tol = tol, var.penalty = var.penalty) if(!ridge) fit$penalty <- penalty else fit$penalty <-lambda } if (model) fit$model <- m if (linear.predictors) { fit$linear.predictors <- Y - fit$residuals names(fit$linear.predictors) <- names(Y) } if (x) fit$x <- X if (y) fit$y <- Y if (se.fit) { se <- drop((((X %*% fit$var) * X) %*% rep(1, ncol(X)))^0.5) names(se) <- names(Y) fit$se.fit <- se } fit <- c(fit, list(call = call, terms = Terms, Design = atr, non.slopes = 1, na.action = nact, scale.pred = scale, fail = FALSE, fitFunction = c("ols", "lm"))) fit$assign <- assig oldClass(fit) <- c("ols", "rms", "lm") fit } ______________________________________________ R-help@r-project.org mailing list https://stat.ethz.ch/mailman/listinfo/r-help PLEASE do read the posting guide http://www.R-project.org/posting-guide.html and provide commented, minimal, self-contained, reproducible code.