Hi folks-

This is following up on a post from last year. When estimating state  
probabilities with ace() for discrete characters, you can get  
negative probabilities. They still sum to 1 for each internal node,  
but this just didn't make sense to me. See example below. Yes, this  
is a contrived and simple example, but I have now observed this  
behavior with several real datasets. Since the actual state  
probabilities as per the Pagel (1994) algorithm should be summations  
of probabilities over all descendent nodes, I don't see how these can  
be - in principle - negative.

The problem seems to be due to the fact that the rate estimates are  
allowed to be negative, as ace() is currently implemented; maybe  
something with NaN or -Inf values in the optimization.

Anyway, I modified ace and obtained results that seem to make sense,  
by optimizing log-transformed rate parameters, thus enforcing a  
constraint that rates are greater than zero. This seems to work.  
Also, optimize is supposedly more stable than nlm or optim (or so I  
thought) for 1-dimensional optimization, so maybe it should use this  
for a 1-rate markov model. As a quick check, I've pasted a modified  
version of ace below in case anyone is interested.

I think this *can* change results relative to the old version.

Example of the problem:

library(laser);
data(skinktree);
data <- c(1, 2, 1, 1, 2, 2, 2, 2, 1, 2, 2, 2, 2, 1, 1, 1, 1, 2, 2,  
1,1, 2, 2);
anc.states <- ace(data, skinktree, type='discrete', model='ER', ip=.1);

# Bad
anc.states$lik.anc;
anc.states$rates

###
# check with new ace, below:

states_new <- ace.discrete(data, skinktree, model='ER', ip=.1);
states_new;

ace.discrete <- function (x, phy,  method = "ML", CI = TRUE, model =  
"ER", scaled = TRUE, kappa = 1, ip = 0.1)
{
     if (class(phy) != "phylo")
         stop("object \"phy\" is not of class \"phylo\".")
     if (is.null(phy$edge.length))
         stop("tree has no branch lengths")

     nb.tip <- length(phy$tip.label)
     nb.node <- phy$Nnode
     if (nb.node != nb.tip - 1)
         stop("\"phy\" is not rooted AND fully dichotomous.")
     if (length(x) != nb.tip)
         stop("length of phenotypic and of phylogenetic data do not  
match.")
     if (!is.null(names(x))) {
         if (all(names(x) %in% phy$tip.label))
             x <- x[phy$tip.label]
         else warning("the names of argument \"x\" and the tip labels  
of the tree\ndid not match: the former were ignored in the analysis.")
     }
     obj <- list()
     if (kappa != 1)
         phy$edge.length <- phy$edge.length^kappa


         if (method != "ML")
             stop("only ML estimation is possible for discrete  
characters.")
         if (!is.factor(x))
             x <- factor(x)
         nl <- nlevels(x)
         lvls <- levels(x)
         x <- as.integer(x)
         if (is.character(model)) {
             rate <- matrix(NA, nl, nl)
             if (model == "ER")
                 np <- rate[] <- 1
             if (model == "ARD") {
                 np <- nl * (nl - 1)
                 rate[col(rate) != row(rate)] <- 1:np
             }
             if (model == "SYM") {
                 np <- nl * (nl - 1)/2
                 rate[col(rate) < row(rate)] <- 1:np
                 rate <- t(rate)
                 rate[col(rate) < row(rate)] <- 1:np
             }
         }
         else {
             if (ncol(model) != nrow(model))
                 stop("the matrix given as `model' is not square")
             if (ncol(model) != nl)
                 stop("the matrix `model' must have as many rows\nas  
the number of categories in `x'")
             rate <- model
             np <- max(rate)
         }
         index.matrix <- rate
         index.matrix[cbind(1:nl, 1:nl)] <- NA
         rate[cbind(1:nl, 1:nl)] <- 0
         rate[rate == 0] <- np + 1
         liks <- matrix(0, nb.tip + nb.node, nl)
         for (i in 1:nb.tip) liks[i, x[i]] <- 1
         phy <- reorder(phy, "pruningwise")
         Q <- matrix(0, nl, nl)
         dev <- function(p, output.liks = FALSE) {
                        p <- exp(p);
             Q[] <- c(p, 0)[rate]
             diag(Q) <- -rowSums(Q)
             for (i in seq(from = 1, by = 2, length.out = nb.node)) {
                 j <- i + 1
                 anc <- phy$edge[i, 1]
                 des1 <- phy$edge[i, 2]
                 des2 <- phy$edge[j, 2]
                 tmp <- eigen(Q * phy$edge.length[i], symmetric = FALSE)
                 P1 <- tmp$vectors %*% diag(exp(tmp$values)) %*%
                   solve(tmp$vectors)
                 tmp <- eigen(Q * phy$edge.length[j], symmetric = FALSE)
                 P2 <- tmp$vectors %*% diag(exp(tmp$values)) %*%
                   solve(tmp$vectors)
                 liks[anc, ] <- P1 %*% liks[des1, ] * P2 %*% liks[des2,
                   ]
             }

             if (output.liks)
                 return(liks[-(1:nb.tip), ])
             LH <- -2 * log(sum(liks[nb.tip + 1, ]));

             return(LH);
         }

                out <- nlm(function(p) dev(p), p = rep(log(ip), length.out =  
np), hessian = TRUE);


         obj$rates <- exp(out$estimate);
         obj$loglik <- -out$minimum/2;

         if (any(out$gradient == 0))
             warning("The likelihood gradient seems flat in at least  
one dimension (gradient null)\ncannot compute the standard-errors of  
the transition rates.\n")
        else obj$se <- sqrt(diag(solve(out$hessian)))
         obj$index.matrix <- index.matrix
         if (CI) {
             lik.anc <- dev(log(obj$rates), TRUE)
             lik.anc <- lik.anc/rowSums(lik.anc)
             colnames(lik.anc) <- lvls
             obj$lik.anc <- lik.anc
         }

     obj$call <- match.call()
     class(obj) <- "ace"
     obj
}


Dan Rabosky
Department of Ecology and Evolutionary Biology &
Fuller Evolutionary Biology Program
Cornell Lab of Ornithology
Cornell University
Ithaca, NY 14853-2701
DLR32Xcornell.edu (X = @)
ph 607 592 4636
fax 607 255 8088

new website:
http://www.eeb.cornell.edu/Rabosky/dan/main.html





        [[alternative HTML version deleted]]

_______________________________________________
R-sig-phylo mailing list
[email protected]
https://stat.ethz.ch/mailman/listinfo/r-sig-phylo

Reply via email to