A recent stackoverflow post <http://stackoverflow.com/questions/31134985> "How do you make R poly() evaluate (or “predict”) multivariate new data (orthogonal or raw)?" made me look at poly and polym again.

predict.poly doesn't work with multivariate data because for such data poly calls polym which does not:
a) return an object inheriting from class poly;
b) return the coefficients needed to make predictions;
c) accept coefficients as an argument or include code to make predictions.

This does lead to some wrong answers without warnings. e.g.
################## vanilla poly and polym ###########
library(datasets)
alm <- lm(stack.loss ~ poly(Air.Flow, Water.Temp, degree=3), stackloss)
# "correct" prediction values [1:10]
alm$fitted.values[1:10]
# predict(alldata)[1:10] gives correct values
predict(alm, stackloss)[1:10]
# predict(partdata) gives wrong values
predict(alm, stackloss[1:10,])
#########
I guess - but haven't confirmed - that with multivariate newdata predict.lm(alm, newdata) calculates new orthogonal polynomials based on newdata rather than applying the original coefficients.

Below I append versions of:
a) polym edited to address the three points above;
b) poly slightly edited to reflect the changes in polym;
c) predict.poly unaltered, just to get it in the same environment as polym and poly for testing.
After implementing these the three sets of predictions above all agree.

I'm very ready to believe that I've got the wrong end of the stick and/or my suggestions can be improved so I welcome correction.

Otherwise, how do I go about getting these changes implemented?
I see stats is maintained by R Core Team. Are they likely to pick it up from here, or do I need to take any other action?

Best regards

Keith Jewell

### polym ##############################################
polym <- function (..., degree = 1, coefs = NULL, raw = FALSE)
# add coefs argument
{
  if(is.null(coefs)) {
    dots <- list(...)
    nd <- length(dots)
    if (nd == 0)
      stop("must supply one or more vectors")
    if (nd == 1)
      return(poly(dots[[1L]], degree, raw = raw))
    n <- sapply(dots, length)
    if (any(n != n[1L]))
      stop("arguments must have the same length")
    z <- do.call("expand.grid", rep.int(list(0:degree), nd))
    s <- rowSums(z)
    ind <- (s > 0) & (s <= degree)
    z <- z[ind, ]
    s <- s[ind]
    aPoly <- poly(dots[[1L]], degree, raw = raw) # avoid 2 calcs
    res <- cbind(1, aPoly)[, 1 + z[, 1]]
# attribute "coefs" = list of coefs from individual variables
    if (!raw) coefs <- list(attr(aPoly, "coefs"))
    for (i in 2:nd) {
      aPoly <- poly(dots[[i]], degree, raw = raw)
      res <- res * cbind(1, aPoly)[, 1 + z[, i]]
      if (!raw) coefs <- c(coefs, list(attr(aPoly, "coefs")))
    }
    colnames(res) <- apply(z, 1L, function(x) paste(x, collapse = "."))
    attr(res, "degree") <- as.vector(s)
    if (!raw) attr(res, "coefs") <- coefs
    class(res) <- c("poly", "matrix") # add poly class
    res
  }
  else
  {
    nd <- length(coefs)    # number of variables
    newdata <- as.data.frame(list(...)) # new data
    if (nd != ncol(newdata)) stop("wrong number of columns in newdata")
    z <- do.call("expand.grid", rep.int(list(0:degree), nd))
    s <- rowSums(z)
    ind <- (s > 0) & (s <= degree)
    z <- z[ind, ]
res <- cbind(1, poly(newdata[[1]], degree=degree, coefs=coefs[[1]]))[, 1 + z[, 1]] for (i in 2:nd) res <- res*cbind(1, poly(newdata[[i]], degree=degree, coefs=coefs[[i]]))[, 1 + z[, i]]
    colnames(res) <- apply(z, 1L, function(x) paste(x, collapse = "."))
    res
  }
}
######################

#### poly ##################
poly <- function (x, ..., degree = 1, coefs = NULL, raw = FALSE)
{
  dots <- list(...)
  if (nd <- length(dots)) {
    if (nd == 1 && length(dots[[1L]]) == 1L)
      degree <- dots[[1L]]
# pass coefs argument as well
    else return(polym(x, ..., degree = degree, coefs=coefs, raw = raw))
  }
  if (is.matrix(x)) {
    m <- unclass(as.data.frame(cbind(x, ...)))
# pass coefs argument as well
return(do.call("polym", c(m, degree = degree, raw = raw, list(coefs=coefs))))
  }
  if (degree < 1)
    stop("'degree' must be at least 1")
  if (anyNA(x))
    stop("missing values are not allowed in 'poly'")
  n <- degree + 1
  if (raw) {
    Z <- outer(x, 1L:degree, "^")
    colnames(Z) <- 1L:degree
    attr(Z, "degree") <- 1L:degree
    class(Z) <- c("poly", "matrix")
    return(Z)
  }
  if (is.null(coefs)) {
    if (degree >= length(unique(x)))
      stop("'degree' must be less than number of unique points")
    xbar <- mean(x)
    x <- x - xbar
    X <- outer(x, seq_len(n) - 1, "^")
    QR <- qr(X)
    if (QR$rank < degree)
      stop("'degree' must be less than number of unique points")
    z <- QR$qr
    z <- z * (row(z) == col(z))
    raw <- qr.qy(QR, z)
    norm2 <- colSums(raw^2)
    alpha <- (colSums(x * raw^2)/norm2 + xbar)[1L:degree]
    Z <- raw/rep(sqrt(norm2), each = length(x))
    colnames(Z) <- 1L:n - 1L
    Z <- Z[, -1, drop = FALSE]
    attr(Z, "degree") <- 1L:degree
    attr(Z, "coefs") <- list(alpha = alpha, norm2 = c(1,
                                                      norm2))
    class(Z) <- c("poly", "matrix")
  }
  else {
    alpha <- coefs$alpha
    norm2 <- coefs$norm2
    Z <- matrix(, length(x), n)
    Z[, 1] <- 1
    Z[, 2] <- x - alpha[1L]
    if (degree > 1)
      for (i in 2:degree) Z[, i + 1] <- (x - alpha[i]) *
      Z[, i] - (norm2[i + 1]/norm2[i]) * Z[, i - 1]
    Z <- Z/rep(sqrt(norm2[-1L]), each = length(x))
    colnames(Z) <- 0:degree
    Z <- Z[, -1, drop = FALSE]
    attr(Z, "degree") <- 1L:degree
    attr(Z, "coefs") <- list(alpha = alpha, norm2 = norm2)
    class(Z) <- c("poly", "matrix")
  }
  Z
}
################################################

### predict.poly ####
predict.poly <- function (object, newdata, ...)
{
  if (missing(newdata))
    return(object)
  if (is.null(attr(object, "coefs")))
    poly(newdata, degree = max(attr(object, "degree")), raw = TRUE)
  else poly(newdata, degree = max(attr(object, "degree")),
            coefs = attr(object, "coefs"))
}
######################

______________________________________________
R-help@r-project.org mailing list -- To UNSUBSCRIBE and more, see
https://stat.ethz.ch/mailman/listinfo/r-help
PLEASE do read the posting guide http://www.R-project.org/posting-guide.html
and provide commented, minimal, self-contained, reproducible code.

Reply via email to