Re: [R-sig-phylo] cut a tree in a given time interval

Nick Matzke Wed, 13 Jul 2011 13:18:21 -0700

Oops, left that out.  Here it is as text and an attached file.


You will have to do
library(ape)

and maybe

library(phangorn)

...ahead of time (hopefully not others...)

============



chainsaw <- function(tr, timepoint=10)
    {

# Take a tree and saw it off evenly across a certaintimepoint.# This removes any tips above the timepoint, andreplaces them

    # with a single tip representing the lineage crossing
    # the timepoint (with a new tip name).

    # Get the tree in a table
    tr_table = prt(tr, printflag=FALSE)

    # Find the tips that are less than 10 my old and drop them

TF_exists_more_recently_than_10mya = tr_table$time_bp <timepoint


    # Get the corresponding labels

labels_for_tips_existing_more_recently_than_10mya =tr_table$label[ TF_exists_more_recently_than_10mya == TRUE ]


    ###########################################
    # Draft chainsaw function
    ###########################################
    # loop through the branches that cross 10 mya

# get a list of the edge start/stops in the phylogeny'sedges

    edge_times_bp = get_edge_times_before_present(tr)

    # which of these branches cross 10 mya?

edges_start_earlier_than_10mya = edge_times_bp[, 1] >timepointedges_end_later_than_10mya = edge_times_bp[, 2] <=timepointedges_to_chainsaw = edges_start_earlier_than_10mya +edges_end_later_than_10mya == 2

# then, for each of these edges, figure out how manytips exist descending from it

    nodes_to_chainsaw = tr$edge[, 2][edges_to_chainsaw]

nodes_to_chainsaw = nodes_to_chainsaw[nodes_to_chainsaw> length(tr$tip.label)]


    # create a copy of the tree to chainsaw
    tree_to_chainsaw = tr

    for (i in 1:length(nodes_to_chainsaw))
        {
        tmp_subtree = extract.clade(tr, nodes_to_chainsaw[i])
        #print(tmp_subtree$tip.label)

        tmp_number_of_tips = length(tmp_subtree$tip.label)
        #print(tmp_number_of_tips)

        # number of tips to drop = (numtips -1)
        numtips_to_drop = tmp_number_of_tips - 1

        # tips_to_drop
        tmp_labels = tmp_subtree$tip.label

        labels_to_drop = tmp_labels[1:numtips_to_drop]

        # new label
        label_kept_num = length(tmp_labels)
        label_kept = tmp_labels[label_kept_num]

new_label = paste("CA_", label_kept, "+",numtips_to_drop, "_tips", sep="")

tree_to_chainsaw$tip.label[tree_to_chainsaw$tip.label ==label_kept] = new_label


        # chop off e.g. 2 of the 3 tips

tree_to_chainsaw = drop.tip(tree_to_chainsaw,labels_to_drop)


        }
    #plot(tree_to_chainsaw)
    #axisPhylo()

tree_to_chainsaw_table = prt(tree_to_chainsaw,printflag=FALSE)

tree_to_chainsaw_table_tips_TF_time_bp_LT_10my =tree_to_chainsaw_table$time_bp < timepoint

tmp_edge_lengths =tree_to_chainsaw_table$edge.length[tree_to_chainsaw_table_tips_TF_time_bp_LT_10my]

times_bp_for_edges_to_chainsaw =tree_to_chainsaw_table$time_bp[tree_to_chainsaw_table_tips_TF_time_bp_LT_10my]


    adjustment = times_bp_for_edges_to_chainsaw - timepoint

    revised_tmp_edge_lengths = tmp_edge_lengths + adjustment

tree_to_chainsaw_table$edge.length[tree_to_chainsaw_table_tips_TF_time_bp_LT_10my]= revised_tmp_edge_lengths


    # revised
    ordered_nodenames = get_nodenums(tree_to_chainsaw)

parent_branches =get_indices_where_list1_occurs_in_list2(ordered_nodenames,tree_to_chainsaw$edge[,2])


    NA_false = is.not.na(tree_to_chainsaw_table$edge.length)

tree_to_chainsaw$edge.length[parent_branches[NA_false]]= tree_to_chainsaw_table$edge.length[NA_false]


    return(tree_to_chainsaw)
    }


# print tree in hierarchical format

prt <- function(t, printflag=TRUE, relabel_nodes = FALSE,time_bp_digits=7)

    {
    # assemble beginning table

    # check if internal node labels exist
    if ("node.label" %in% attributes(t)$names == FALSE)
        {
        rootnum = get_nodenum_structural_root(t)

new_node_labels = paste("inNode",rootnum:(rootnum+t$Nnode-1), sep="")

        t$node.label = new_node_labels
        }

    # or manually relabel the internal nodes, if desired
    if (relabel_nodes == TRUE)
        {
        rootnum = get_nodenum_structural_root(t)

new_node_labels = paste("inNode",rootnum:(rootnum+t$Nnode-1), sep="")

        t$node.label = new_node_labels
        }

    labels = c(t$tip.label, t$node.label)
    ordered_nodenames = get_nodenums(t)
    #nodenums = 1:length(labels)
    node.types1 = rep("tip", length(t$tip.label))
    node.types2 = rep("internal", length(t$node.label))
    node.types2[1] = "root"
    node.types = c(node.types1, node.types2)

    # These are the index numbers of the edges below each node

parent_branches =get_indices_where_list1_occurs_in_list2(ordered_nodenames,t$edge[,2])

    #parent_edges = parent_branches
    brlen_to_parent = t$edge.length[parent_branches]

    parent_nodes = t$edge[,1][parent_branches]

daughter_nodes = lapply(ordered_nodenames,get_daughters, t)


    # print out the structural root, if desired
    root_nodenum = get_nodenum_structural_root(t)
    tmpstr = paste("prt(t): root=", root_nodenum, "\n", sep="")
    prflag(tmpstr, printflag=printflag)

levels_for_nodes = unlist(lapply(ordered_nodenames,get_level, t))

    #tmplevel = get_level(23, t)
    #print(tmplevel)


    #height above root
    hts_at_end_of_branches_aka_at_nodes = t$edge.length
    hts_at_end_of_branches_aka_at_nodes = get_all_node_ages(t)
    h = hts_at_end_of_branches_aka_at_nodes

    # times before present, below (ultrametric!) tips

# numbers are positive, i.e. in millions of yearsbefore present

    #                       i.e. mybp, Ma
    times_before_present = get_max_height_tree(t) - h


    # fill in the ages of each node for the edges
    edge_ages = t$edge
    edge_ages[,1] = h[t$edge[,1]]    # bottom of branch
    edge_ages[,2] = h[t$edge[,2]]    # top of branch

# fill in the times before present of each node for theedges

    edge_times_bp = t$edge

edge_times_bp[,1] = times_before_present[t$edge[,1]]# bottom of branchedge_times_bp[,2] = times_before_present[t$edge[,2]]# top of branch

tmpdtf = cbind(1:length(ordered_nodenames),ordered_nodenames, levels_for_nodes, node.types,parent_branches, brlen_to_parent, parent_nodes,daughter_nodes, h, round(times_before_present,digits=time_bp_digits), labels)


    dtf = as.data.frame(tmpdtf, row.names=NULL)
    # nd = node

    # edge.length is the same as brlen_2_parent

names(dtf) = c("node", "ord_ndname", "node_lvl","node.type", "parent_br", "edge.length", "ancestor","daughter_nds", "node_ht", "time_bp", "label")


    # convert the cols from class "list" to some natural class
    dtf = unlist_dtf_cols(dtf, printflag=FALSE)

    # print if desired
    prflag(dtf, printflag=printflag)

    #tree_strings = c()
    #root_str = get_node_info(root_nodenum, t)
    return(dtf)
    }



get_edge_times_before_present <- function(t)
    {
    #height above root
    hts_at_end_of_branches_aka_at_nodes = t$edge.length
    hts_at_end_of_branches_aka_at_nodes = get_all_node_ages(t)
    h = hts_at_end_of_branches_aka_at_nodes

    # times before present, below (ultrametric!) tips

# numbers are positive, i.e. in millions of yearsbefore present

    #                       i.e. mybp, Ma
    times_before_present = get_max_height_tree(t) - h


    # fill in the ages of each node for the edges
    edge_ages = t$edge
    edge_ages[,1] = h[t$edge[,1]]    # bottom of branch
    edge_ages[,2] = h[t$edge[,2]]    # top of branch

# fill in the times before present of each node for theedges

    edge_times_bp = t$edge

edge_times_bp[,1] = times_before_present[t$edge[,1]]# bottom of branchedge_times_bp[,2] = times_before_present[t$edge[,2]]# top of branch


    return(edge_times_bp)
    }

extract.clade <- function(phy, node, root.edge = 0,interactive = FALSE)

{
    Ntip <- length(phy$tip.label)
    ROOT <- Ntip + 1
    Nedge <- dim(phy$edge)[1]
    wbl <- !is.null(phy$edge.length)
    if (interactive) node <- identify(phy)$nodes else {
        if (length(node) > 1) {
            node <- node[1]

warning("only the first value of 'node' hasbeen considered")

        }
        if (is.character(node)) {
            if (is.null(phy$node.label))
                stop("the tree has no node labels")
            node <- which(phy$node.label %in% node) + Ntip
        }
        if (node <= Ntip)

stop("node number must be greater than thenumber of tips")

    }
    if (node == ROOT) return(phy)
    phy <- reorder(phy) # insure it is in cladewise order
    root.node <- which(phy$edge[, 2] == node)
    start <- root.node + 1 # start of the clade looked for
    anc <- phy$edge[root.node, 1] # the ancestor of 'node'

next.anc <- which(phy$edge[-(1:start), 1] <= anc) #find the next occurence of 'anc' or an 'older' node

keep <- if (length(next.anc)) start + 0:(next.anc[1] -1) else start:Nedge


    if (root.edge) {
        NewRootEdge <- phy$edge.length[root.node]
        root.edge <- root.edge - 1
        while (root.edge) {
            if (anc == ROOT) break
            i <- which(phy$edge[, 2] ==  anc)
            NewRootEdge <- NewRootEdge + phy$edge.length[i]
            root.edge <- root.edge - 1
            anc <- phy$edge[i, 1]
        }
        if (root.edge && !is.null(phy$root.edge))
            NewRootEdge <- NewRootEdge + phy$root.edge
        phy$root.edge <- NewRootEdge
    }

    phy$edge <- phy$edge[keep, ]
    if (wbl) phy$edge.length <- phy$edge.length[keep]
    TIPS <- phy$edge[, 2] <= Ntip
    tip <- phy$edge[TIPS, 2]

phy$tip.label <- phy$tip.label[sort(tip)] # <- addedsort to avoid shuffling of tip labels (2010-07-21)

    ## keep the ordering so no need to reorder tip.label:
    phy$edge[TIPS, 2] <- order(tip)
    if (!is.null(phy$node.label))

phy$node.label <-phy$node.label[sort(unique(phy$edge[, 1])) - Ntip]

    Ntip <- length(phy$tip.label)
    phy$Nnode <- dim(phy$edge)[1] - Ntip + 1L
    ## The block below renumbers the nodes so that they conform
    ## to the "phylo" format -- same as in root()
    newNb <- integer(Ntip + phy$Nnode)
    newNb[node] <- Ntip + 1L
    sndcol <- phy$edge[, 2] > Ntip

## executed from right to left, so newNb is modifiedbefore phy$edge:

    phy$edge[sndcol, 2] <- newNb[phy$edge[sndcol, 2]] <-
        (Ntip + 2):(Ntip + phy$Nnode)
    phy$edge[, 1] <- newNb[phy$edge[, 1]]
    phy
}



# this returns the NUMBERS identifying each node
get_nodenums <- function(t)
    {

# get just the unique node numbers from the edge list(left column: start node; right column: end node):

    nodenames = unique(c(t$edge))
    ordered_nodenames = nodenames[order(nodenames)]
    return(ordered_nodenames)
    }


get_nodenum_structural_root <- function(t, print_nodenum=FALSE)
        {
        #numnodes = length(t$tip.label) + length(t$node.label)
        #ordered_nodes = 1:length(numnodes)
        
        ordered_nodes = get_nodenums(t)

        root_nodenums_list = c()
        for (n in 1:length(ordered_nodes))
                {
                tmpnode = ordered_nodes[n]
                if (tmpnode %in% t$edge[,2])
                        {
                        blah = TRUE
                        }
                else
                        {
                        if (print_nodenum == TRUE)
                                {

cat("get_nodenum_structural_root(): Root nodenum = ",tmpnode, sep="")

                                }
                        root_nodenums_list = c(root_nodenums_list, tmpnode)
                        }
                }
        return(root_nodenums_list)
        }

# NOTE!!! THESE MATCH FUNCTIONS JUST RETURN THE *FIRST*MATCH, *NOT* ALL MATCHES

# (argh)
# return indices in 2nd list matching the first list
# It WILL return one match for each item in the list, though...

get_indices_where_list1_occurs_in_list2 <- function(list1,list2)

    {
    match_indices = match(list1, list2)
    return(match_indices)
    }


is.not.na <- function(x)
    {
    return(is.na(x) == FALSE)
    }



=========================




============

On 7/13/11 4:42 AM, ppi...@uniroma3.it wrote:

Hi Nick,

I try to run but "get_nodenum_structural_root" is a
non defined function; I miss smething?

Due to my email formatting I spent time to re-adjust
commenting line that were paragraphed

could you send me the .R file source?
..together with get_nodenum_structural_root
thanks in advance

ciao
paolo












Here's chainsaw().  It also requires sourcing a few
other
functions.  Please cite me if you use it :-).

=========================

chainsaw<- function(tr, timepoint=10)
        {
        # Take a tree and saw it off evenly across a certain
timepoint.
        # This removes any tips above the timepoint, and
replaces them
        # with a single tip representing the lineage crossing
        # the timepoint (with a new tip name).

        # Get the tree in a table
        tr_table = prt(tr, printflag=FALSE)

        # Find the tips that are less than 10 my old and drop
them
        TF_exists_more_recently_than_10mya = tr_table$time_bp
<
timepoint

        # Get the corresponding labels
        labels_for_tips_existing_more_recently_than_10mya =
tr_table$label[ TF_exists_more_recently_than_10mya ==
TRUE ]

        ###########################################
        # Draft chainsaw function
        ###########################################
        # loop through the branches that cross 10 mya

        # get a list of the edge start/stops in the
phylogeny's edges
        edge_times_bp = get_edge_times_before_present(tr)

        # which of these branches cross 10 mya?
        edges_start_earlier_than_10mya = edge_times_bp[, 1]>
timepoint
        edges_end_later_than_10mya = edge_times_bp[, 2]<=
timepoint
        edges_to_chainsaw = edges_start_earlier_than_10mya +
edges_end_later_than_10mya == 2

        # then, for each of these edges, figure out how many
tips
exist descending from it
        nodes_to_chainsaw = tr$edge[, 2][edges_to_chainsaw]
        nodes_to_chainsaw =
nodes_to_chainsaw[nodes_to_chainsaw>
length(tr$tip.label)]

        # create a copy of the tree to chainsaw
        tree_to_chainsaw = tr

        for (i in 1:length(nodes_to_chainsaw))
                {
                tmp_subtree = extract.clade(tr,
nodes_to_chainsaw[i])
                #print(tmp_subtree$tip.label)

                tmp_number_of_tips = length(tmp_subtree$tip.label)
                #print(tmp_number_of_tips)

                # number of tips to drop = (numtips -1)
                numtips_to_drop = tmp_number_of_tips - 1

                # tips_to_drop
                tmp_labels = tmp_subtree$tip.label

                labels_to_drop = tmp_labels[1:numtips_to_drop]

                # new label
                label_kept_num = length(tmp_labels)
                label_kept = tmp_labels[label_kept_num]
                new_label = paste("CA_", label_kept, "+",
numtips_to_drop,
"_tips", sep="")
                tree_to_chainsaw$tip.label[tree_to_chainsaw$tip.label
==
label_kept] = new_label

                # chop off e.g. 2 of the 3 tips
                tree_to_chainsaw = drop.tip(tree_to_chainsaw,
labels_to_drop)

                }
        #plot(tree_to_chainsaw)
        #axisPhylo()

        tree_to_chainsaw_table = prt(tree_to_chainsaw,
printflag=FALSE)

        tree_to_chainsaw_table_tips_TF_time_bp_LT_10my =
tree_to_chainsaw_table$time_bp<  timepoint


        tmp_edge_lengths =
tree_to_chainsaw_table$edge.length[tree_to_chainsaw_table_tips_TF_time_bp_LT_10my]

        times_bp_for_edges_to_chainsaw =
tree_to_chainsaw_table$time_bp[tree_to_chainsaw_table_tips_TF_time_bp_LT_10my]

        adjustment = times_bp_for_edges_to_chainsaw -
timepoint

        revised_tmp_edge_lengths = tmp_edge_lengths +
adjustment

        
tree_to_chainsaw_table$edge.length[tree_to_chainsaw_table_tips_TF_time_bp_LT_10my]
= revised_tmp_edge_lengths

        # revised
        ordered_nodenames = get_nodenums(tree_to_chainsaw)
        parent_branches =
get_indices_where_list1_occurs_in_list2(ordered_nodenames,
tree_to_chainsaw$edge[,2])

        NA_false =
is.not.na(tree_to_chainsaw_table$edge.length)

        tree_to_chainsaw$edge.length[parent_branches[NA_false]]
=
tree_to_chainsaw_table$edge.length[NA_false]

        return(tree_to_chainsaw)
        }


# print tree in hierarchical format
prt<- function(t, printflag=TRUE, relabel_nodes =
FALSE,
time_bp_digits=7)
        {
        # assemble beginning table

        # check if internal node labels exist
        if ("node.label" %in% attributes(t)$names == FALSE)
                {
                rootnum = get_nodenum_structural_root(t)

                new_node_labels = paste("inNode",
rootnum:(rootnum+t$Nnode-1), sep="")
                t$node.label = new_node_labels
                }

        # or manually relabel the internal nodes, if desired
        if (relabel_nodes == TRUE)
                {
                rootnum = get_nodenum_structural_root(t)

                new_node_labels = paste("inNode",
rootnum:(rootnum+t$Nnode-1), sep="")
                t$node.label = new_node_labels
                }

        labels = c(t$tip.label, t$node.label)
        ordered_nodenames = get_nodenums(t)
        #nodenums = 1:length(labels)
        node.types1 = rep("tip", length(t$tip.label))
        node.types2 = rep("internal", length(t$node.label))
        node.types2[1] = "root"
        node.types = c(node.types1, node.types2)

        # These are the index numbers of the edges below each
node
        parent_branches =
get_indices_where_list1_occurs_in_list2(ordered_nodenames,
t$edge[,2])
        #parent_edges = parent_branches
        brlen_to_parent = t$edge.length[parent_branches]

        parent_nodes = t$edge[,1][parent_branches]
        daughter_nodes = lapply(ordered_nodenames,
get_daughters, t)

        # print out the structural root, if desired
        root_nodenum = get_nodenum_structural_root(t)
        tmpstr = paste("prt(t): root=", root_nodenum, "\n",
sep="")
        prflag(tmpstr, printflag=printflag)

        levels_for_nodes = unlist(lapply(ordered_nodenames,
get_level, t))
        #tmplevel = get_level(23, t)
        #print(tmplevel)


        #height above root
        hts_at_end_of_branches_aka_at_nodes = t$edge.length
        hts_at_end_of_branches_aka_at_nodes =
get_all_node_ages(t)
        h = hts_at_end_of_branches_aka_at_nodes

        # times before present, below (ultrametric!) tips
        # numbers are positive, i.e. in millions of years
before
present
        #                       i.e. mybp, Ma
        times_before_present = get_max_height_tree(t) - h


        # fill in the ages of each node for the edges
        edge_ages = t$edge
        edge_ages[,1] = h[t$edge[,1]]   # bottom of branch
        edge_ages[,2] = h[t$edge[,2]]   # top of branch


        # fill in the times before present of each node for
the edges
        edge_times_bp = t$edge
        edge_times_bp[,1] =
times_before_present[t$edge[,1]]        #
bottom of branch
        edge_times_bp[,2] =
times_before_present[t$edge[,2]]        # top
of branch



        tmpdtf = cbind(1:length(ordered_nodenames),
ordered_nodenames, levels_for_nodes, node.types,
parent_branches, brlen_to_parent, parent_nodes,
daughter_nodes, h, round(times_before_present,
digits=time_bp_digits), labels)

        dtf = as.data.frame(tmpdtf, row.names=NULL)
        # nd = node

        # edge.length is the same as brlen_2_parent
        names(dtf) = c("node", "ord_ndname", "node_lvl",
"node.type", "parent_br", "edge.length", "ancestor",
"daughter_nds", "node_ht", "time_bp", "label")

        # convert the cols from class "list" to some natural
class
        dtf = unlist_dtf_cols(dtf, printflag=FALSE)

        # print if desired
        prflag(dtf, printflag=printflag)

        #tree_strings = c()
        #root_str = get_node_info(root_nodenum, t)
        return(dtf)
        }



get_edge_times_before_present<- function(t)
        {
        #height above root
        hts_at_end_of_branches_aka_at_nodes = t$edge.length
        hts_at_end_of_branches_aka_at_nodes =
get_all_node_ages(t)
        h = hts_at_end_of_branches_aka_at_nodes

        # times before present, below (ultrametric!) tips
        # numbers are positive, i.e. in millions of years
before
present
        #                       i.e. mybp, Ma
        times_before_present = get_max_height_tree(t) - h


        # fill in the ages of each node for the edges
        edge_ages = t$edge
        edge_ages[,1] = h[t$edge[,1]]   # bottom of branch
        edge_ages[,2] = h[t$edge[,2]]   # top of branch

        # fill in the times before present of each node for
the edges
        edge_times_bp = t$edge
        edge_times_bp[,1] =
times_before_present[t$edge[,1]]        #
bottom of branch
        edge_times_bp[,2] =
times_before_present[t$edge[,2]]        # top
of branch

        return(edge_times_bp)
        }


extract.clade<- function(phy, node, root.edge = 0,
interactive = FALSE)
{
      Ntip<- length(phy$tip.label)
      ROOT<- Ntip + 1
      Nedge<- dim(phy$edge)[1]
      wbl<- !is.null(phy$edge.length)
      if (interactive) node<- identify(phy)$nodes else
{
          if (length(node)>  1) {
              node<- node[1]
              warning("only the first value of 'node'
has
been considered")
          }
          if (is.character(node)) {
              if (is.null(phy$node.label))
                  stop("the tree has no node labels")
              node<- which(phy$node.label %in% node) +
Ntip
          }
          if (node<= Ntip)
              stop("node number must be greater than
the
number of tips")
      }
      if (node == ROOT) return(phy)
      phy<- reorder(phy) # insure it is in cladewise
order
      root.node<- which(phy$edge[, 2] == node)
      start<- root.node + 1 # start of the clade
looked for
      anc<- phy$edge[root.node, 1] # the ancestor of
'node'
      next.anc<- which(phy$edge[-(1:start), 1]<= anc)
#
find the next occurence of 'anc' or an 'older' node

      keep<- if (length(next.anc)) start +
0:(next.anc[1] -
1) else start:Nedge

      if (root.edge) {
          NewRootEdge<- phy$edge.length[root.node]
          root.edge<- root.edge - 1
          while (root.edge) {
              if (anc == ROOT) break
              i<- which(phy$edge[, 2] ==  anc)
              NewRootEdge<- NewRootEdge +
phy$edge.length[i]
              root.edge<- root.edge - 1
              anc<- phy$edge[i, 1]
          }
          if (root.edge&&  !is.null(phy$root.edge))
              NewRootEdge<- NewRootEdge +
phy$root.edge
          phy$root.edge<- NewRootEdge
      }

      phy$edge<- phy$edge[keep, ]
      if (wbl) phy$edge.length<- phy$edge.length[keep]
      TIPS<- phy$edge[, 2]<= Ntip
      tip<- phy$edge[TIPS, 2]
      phy$tip.label<- phy$tip.label[sort(tip)] #<-
added
sort to avoid shuffling of tip labels (2010-07-21)
      ## keep the ordering so no need to reorder
tip.label:
      phy$edge[TIPS, 2]<- order(tip)
      if (!is.null(phy$node.label))
          phy$node.label<-
phy$node.label[sort(unique(phy$edge[, 1])) - Ntip]
      Ntip<- length(phy$tip.label)
      phy$Nnode<- dim(phy$edge)[1] - Ntip + 1L
      ## The block below renumbers the nodes so that
they conform
      ## to the "phylo" format -- same as in root()
      newNb<- integer(Ntip + phy$Nnode)
      newNb[node]<- Ntip + 1L
      sndcol<- phy$edge[, 2]>  Ntip
      ## executed from right to left, so newNb is
modified
before phy$edge:
      phy$edge[sndcol, 2]<- newNb[phy$edge[sndcol, 2]]
<-
          (Ntip + 2):(Ntip + phy$Nnode)
      phy$edge[, 1]<- newNb[phy$edge[, 1]]
      phy
}



# this returns the NUMBERS identifying each node
get_nodenums<- function(t)
        {
        # get just the unique node numbers from the edge list
(left
column: start node; right column: end node):
        nodenames = unique(c(t$edge))
        ordered_nodenames = nodenames[order(nodenames)]
        return(ordered_nodenames)
        }

# NOTE!!! THESE MATCH FUNCTIONS JUST RETURN THE
*FIRST*
MATCH, *NOT* ALL MATCHES
# (argh)
# return indices in 2nd list matching the first list
# It WILL return one match for each item in the list,
though...
get_indices_where_list1_occurs_in_list2<-
function(list1,
list2)
        {
        match_indices = match(list1, list2)
        return(match_indices)
        }


is.not.na<- function(x)
        {
        return(is.na(x) == FALSE)
        }



=========================



On 7/12/11 10:06 PM, David Bapst wrote:

Do you want a function that gives you a tree of taxa
observed in a time bin
(like, in the fossil record, as in Ruta et al.,
2007)? Or gives you the tree
of the lineages extant at point in time based on
some global phylogeny? If
that is what you want, the following function I
wrote for doing time-slices
on trees should do that. My most recent version (not
posted) has some
additions so that the same time-slice can be taken
across multiple trees
that may have the root in different place. (These
modifications use the
earliest fossil species as an anchor time.)

This version of the code seems to work okay, for
example try:
tree1<-rtree(500)
timeslice.phy(tree1,slice.time=4,plot=T)

Let me know if this works alright,
-Dave, UChicago


timeslice.phy<-function(ttree,slice.time,plot=T){
      #take a phylogeny and produce a phylogenetic
'slice' at time X
          #makes an ultrametric tree, as if made of
lineages extant at time X
          #where X is a point in time after root time
= 0
      require(ape)
      tslice<-slice.time #time from root to slice
time
      #first let's drop all edges that branch later
than the slice
      #make them all single lineages by dropping all
but one taxon
      dnode<-dist.nodes(ttree)[,Ntip(ttree)+1]
      #identify the ancestor nodes of edges which
cross the tslice
      cedge<-which(sapply(1:Nedge(ttree),function(x)
any(ttree$edge[x,1]==which(dnode<tslice))
              &
any(ttree$edge[x,2]==which(dnode>=tslice))))
      droppers<-numeric()
      for(i in 1:length(cedge)){
          desc<-ttree$edge[cedge[i],2]
          if(desc>Ntip(ttree)){    #if an internal
edge that goes past the
tslice
              desctip<-unlist(prop.part(ttree)[desc-Ntip(ttree)])
    #drop all
but one tip
              droppers<-c(droppers,desctip[-1])
          }}
      stree<-drop.tip(ttree,droppers)
      #which edges cross over tslice?
      dnode<-dist.nodes(stree)[,Ntip(stree)+1]
      cedge<-sapply(1:Nedge(stree),function(x)
any(stree$edge[x,2]==which(dnode>=tslice)))
      cnode_depth<-dnode[stree$edge[cedge,1]]
      stree$edge.length[cedge]<-tslice-cnode_depth
      #last, let's drop all terminal taxa that are
less than the tslice
      dnode<-dist.nodes(stree)[,Ntip(stree)+1]
      droppers<-which((dnode[1:Ntip(stree)]+0.1)<tslice)
      stree<-drop.tip(stree,droppers)

if(plot){layout(matrix(1:2,,2));plot(ladderize(ttree),show.tip.label=F);axisPhylo();plot(stree,show.tip.label=F)}
      stree
      }

On Tue, Jul 12, 2011 at 3:19 PM,<ppi...@uniroma3.it>
  wrote:

Hi all,
someone knows a smart coding to cut a tree in order
to
retain only taxa present in a given time interval
bin?

Thanks in advance for any suggestion
best
paolo

_______________________________________________
R-sig-phylo mailing list
R-sig-phylo@r-project.org
https://stat.ethz.ch/mailman/listinfo/r-sig-phylo


--
====================================================
Nicholas J. Matzke
Ph.D. Candidate, Graduate Student Researcher

Huelsenbeck Lab
Center for Theoretical Evolutionary Genomics
4151 VLSB (Valley Life Sciences Building)
Department of Integrative Biology
University of California, Berkeley

Graduate Student Instructor, IB200B
Principles of Phylogenetics: Ecology and Evolution
http://ib.berkeley.edu/courses/ib200b/
http://phylo.wikidot.com/


Lab websites:
http://ib.berkeley.edu/people/lab_detail.php?lab=54
http://fisher.berkeley.edu/cteg/hlab.html
Dept. personal page:
http://ib.berkeley.edu/people/students/person_detail.php?person=370
Lab personal page:
http://fisher.berkeley.edu/cteg/members/matzke.html
Lab phone: 510-643-6299
Dept. fax: 510-643-6264

Cell phone: 510-301-0179
Email: mat...@berkeley.edu

Mailing address:
Department of Integrative Biology
3060 VLSB #3140
Berkeley, CA 94720-3140

-----------------------------------------------------
"[W]hen people thought the earth was flat, they were
wrong.
When people thought the earth was spherical, they were
wrong. But if you think that thinking the earth is
spherical
is just as wrong as thinking the earth is flat, then
your
view is wronger than both of them put together."

Isaac Asimov (1989). "The Relativity of Wrong." The
Skeptical Inquirer, 14(1), 35-44. Fall 1989.
http://chem.tufts.edu/AnswersInScience/RelativityofWrong.htm

_______________________________________________
R-sig-phylo mailing list
R-sig-phylo@r-project.org
https://stat.ethz.ch/mailman/listinfo/r-sig-phylo


--
====================================================
Nicholas J. Matzke
Ph.D. Candidate, Graduate Student Researcher

Huelsenbeck Lab
Center for Theoretical Evolutionary Genomics
4151 VLSB (Valley Life Sciences Building)
Department of Integrative Biology
University of California, Berkeley

Graduate Student Instructor, IB200B
Principles of Phylogenetics: Ecology and Evolution
http://ib.berkeley.edu/courses/ib200b/
http://phylo.wikidot.com/


Lab websites:
http://ib.berkeley.edu/people/lab_detail.php?lab=54
http://fisher.berkeley.edu/cteg/hlab.html

Dept. personal page:http://ib.berkeley.edu/people/students/person_detail.php?person=370Lab personal page:http://fisher.berkeley.edu/cteg/members/matzke.html

Lab phone: 510-643-6299
Dept. fax: 510-643-6264

Cell phone: 510-301-0179
Email: mat...@berkeley.edu

Mailing address:
Department of Integrative Biology
3060 VLSB #3140
Berkeley, CA 94720-3140

-----------------------------------------------------

"[W]hen people thought the earth was flat, they were wrong.When people thought the earth was spherical, they werewrong. But if you think that thinking the earth is sphericalis just as wrong as thinking the earth is flat, then yourview is wronger than both of them put together."

Isaac Asimov (1989). "The Relativity of Wrong." TheSkeptical Inquirer, 14(1), 35-44. Fall 1989.

http://chem.tufts.edu/AnswersInScience/RelativityofWrong.htm
====================================================


chainsaw <- function(tr, timepoint=10)
    {
    # Take a tree and saw it off evenly across a certain timepoint.
    # This removes any tips above the timepoint, and replaces them
    # with a single tip representing the lineage crossing
    # the timepoint (with a new tip name).

    # Get the tree in a table
    tr_table = prt(tr, printflag=FALSE)
    
    # Find the tips that are less than 10 my old and drop them
    TF_exists_more_recently_than_10mya = tr_table$time_bp < timepoint
    
    # Get the corresponding labels
    labels_for_tips_existing_more_recently_than_10mya = tr_table$label[ 
TF_exists_more_recently_than_10mya == TRUE ]
    
    ###########################################
    # Draft chainsaw function
    ###########################################
    # loop through the branches that cross 10 mya
    
    # get a list of the edge start/stops in the phylogeny's edges
    edge_times_bp = get_edge_times_before_present(tr)
    
    # which of these branches cross 10 mya?
    edges_start_earlier_than_10mya = edge_times_bp[, 1] > timepoint
    edges_end_later_than_10mya = edge_times_bp[, 2] <= timepoint
    edges_to_chainsaw = edges_start_earlier_than_10mya + 
edges_end_later_than_10mya == 2
    
    # then, for each of these edges, figure out how many tips exist descending 
from it
    nodes_to_chainsaw = tr$edge[, 2][edges_to_chainsaw]
    nodes_to_chainsaw = nodes_to_chainsaw[nodes_to_chainsaw > 
length(tr$tip.label)]
    
    # create a copy of the tree to chainsaw
    tree_to_chainsaw = tr
    
    for (i in 1:length(nodes_to_chainsaw))
        {
        tmp_subtree = extract.clade(tr, nodes_to_chainsaw[i])
        #print(tmp_subtree$tip.label)
       
        tmp_number_of_tips = length(tmp_subtree$tip.label)
        #print(tmp_number_of_tips)
       
        # number of tips to drop = (numtips -1)
        numtips_to_drop = tmp_number_of_tips - 1
       
        # tips_to_drop
        tmp_labels = tmp_subtree$tip.label
       
        labels_to_drop = tmp_labels[1:numtips_to_drop]
       
        # new label
        label_kept_num = length(tmp_labels)
        label_kept = tmp_labels[label_kept_num]
        new_label = paste("CA_", label_kept, "+", numtips_to_drop, "_tips", 
sep="")
        tree_to_chainsaw$tip.label[tree_to_chainsaw$tip.label == label_kept] = 
new_label
       
        # chop off e.g. 2 of the 3 tips
        tree_to_chainsaw = drop.tip(tree_to_chainsaw, labels_to_drop)
       
        }
    #plot(tree_to_chainsaw)
    #axisPhylo()
    
    tree_to_chainsaw_table = prt(tree_to_chainsaw, printflag=FALSE)
    
    tree_to_chainsaw_table_tips_TF_time_bp_LT_10my = 
tree_to_chainsaw_table$time_bp < timepoint
    
    
    tmp_edge_lengths = 
tree_to_chainsaw_table$edge.length[tree_to_chainsaw_table_tips_TF_time_bp_LT_10my]
    
    times_bp_for_edges_to_chainsaw = 
tree_to_chainsaw_table$time_bp[tree_to_chainsaw_table_tips_TF_time_bp_LT_10my]
    
    adjustment = times_bp_for_edges_to_chainsaw - timepoint
    
    revised_tmp_edge_lengths = tmp_edge_lengths + adjustment
    
    
tree_to_chainsaw_table$edge.length[tree_to_chainsaw_table_tips_TF_time_bp_LT_10my]
 = revised_tmp_edge_lengths
    
    # revised
    ordered_nodenames = get_nodenums(tree_to_chainsaw)
    parent_branches = 
get_indices_where_list1_occurs_in_list2(ordered_nodenames, 
tree_to_chainsaw$edge[,2])
    
    NA_false = is.not.na(tree_to_chainsaw_table$edge.length)
    
    tree_to_chainsaw$edge.length[parent_branches[NA_false]] = 
tree_to_chainsaw_table$edge.length[NA_false]
    
    return(tree_to_chainsaw)
    }


# print tree in hierarchical format
prt <- function(t, printflag=TRUE, relabel_nodes = FALSE, time_bp_digits=7)
    {
    # assemble beginning table
    
    # check if internal node labels exist
    if ("node.label" %in% attributes(t)$names == FALSE)
        {
        rootnum = get_nodenum_structural_root(t)
       
        new_node_labels = paste("inNode", rootnum:(rootnum+t$Nnode-1), sep="")
        t$node.label = new_node_labels
        }
    
    # or manually relabel the internal nodes, if desired
    if (relabel_nodes == TRUE)
        {
        rootnum = get_nodenum_structural_root(t)
       
        new_node_labels = paste("inNode", rootnum:(rootnum+t$Nnode-1), sep="")
        t$node.label = new_node_labels
        }
    
    labels = c(t$tip.label, t$node.label)
    ordered_nodenames = get_nodenums(t)
    #nodenums = 1:length(labels)
    node.types1 = rep("tip", length(t$tip.label))
    node.types2 = rep("internal", length(t$node.label))
    node.types2[1] = "root"
    node.types = c(node.types1, node.types2)
    
    # These are the index numbers of the edges below each node
    parent_branches = 
get_indices_where_list1_occurs_in_list2(ordered_nodenames, t$edge[,2])
    #parent_edges = parent_branches
    brlen_to_parent = t$edge.length[parent_branches]
    
    parent_nodes = t$edge[,1][parent_branches]
    daughter_nodes = lapply(ordered_nodenames, get_daughters, t)
    
    # print out the structural root, if desired
    root_nodenum = get_nodenum_structural_root(t)
    tmpstr = paste("prt(t): root=", root_nodenum, "\n", sep="")
    prflag(tmpstr, printflag=printflag)
    
    levels_for_nodes = unlist(lapply(ordered_nodenames, get_level, t))
    #tmplevel = get_level(23, t)
    #print(tmplevel)
    
    
    #height above root
    hts_at_end_of_branches_aka_at_nodes = t$edge.length
    hts_at_end_of_branches_aka_at_nodes = get_all_node_ages(t)
    h = hts_at_end_of_branches_aka_at_nodes

    # times before present, below (ultrametric!) tips
    # numbers are positive, i.e. in millions of years before present
    #                       i.e. mybp, Ma
    times_before_present = get_max_height_tree(t) - h

    
    # fill in the ages of each node for the edges
    edge_ages = t$edge
    edge_ages[,1] = h[t$edge[,1]]    # bottom of branch
    edge_ages[,2] = h[t$edge[,2]]    # top of branch


    # fill in the times before present of each node for the edges
    edge_times_bp = t$edge
    edge_times_bp[,1] = times_before_present[t$edge[,1]]    # bottom of branch
    edge_times_bp[,2] = times_before_present[t$edge[,2]]    # top of branch
    
    
    
    tmpdtf = cbind(1:length(ordered_nodenames), ordered_nodenames, 
levels_for_nodes, node.types, parent_branches, brlen_to_parent, parent_nodes, 
daughter_nodes, h, round(times_before_present, digits=time_bp_digits), labels)
    
    dtf = as.data.frame(tmpdtf, row.names=NULL)
    # nd = node
    
    # edge.length is the same as brlen_2_parent
    names(dtf) = c("node", "ord_ndname", "node_lvl", "node.type", "parent_br", 
"edge.length", "ancestor", "daughter_nds", "node_ht", "time_bp", "label")
    
    # convert the cols from class "list" to some natural class
    dtf = unlist_dtf_cols(dtf, printflag=FALSE)
    
    # print if desired
    prflag(dtf, printflag=printflag)
    
    #tree_strings = c()
    #root_str = get_node_info(root_nodenum, t)
    return(dtf)
    }



get_edge_times_before_present <- function(t)
    {
    #height above root
    hts_at_end_of_branches_aka_at_nodes = t$edge.length
    hts_at_end_of_branches_aka_at_nodes = get_all_node_ages(t)
    h = hts_at_end_of_branches_aka_at_nodes

    # times before present, below (ultrametric!) tips
    # numbers are positive, i.e. in millions of years before present
    #                       i.e. mybp, Ma
    times_before_present = get_max_height_tree(t) - h

    
    # fill in the ages of each node for the edges
    edge_ages = t$edge
    edge_ages[,1] = h[t$edge[,1]]    # bottom of branch
    edge_ages[,2] = h[t$edge[,2]]    # top of branch

    # fill in the times before present of each node for the edges
    edge_times_bp = t$edge
    edge_times_bp[,1] = times_before_present[t$edge[,1]]    # bottom of branch
    edge_times_bp[,2] = times_before_present[t$edge[,2]]    # top of branch
    
    return(edge_times_bp)
    }


extract.clade <- function(phy, node, root.edge = 0, interactive = FALSE)
{
    Ntip <- length(phy$tip.label)
    ROOT <- Ntip + 1
    Nedge <- dim(phy$edge)[1]
    wbl <- !is.null(phy$edge.length)
    if (interactive) node <- identify(phy)$nodes else {
        if (length(node) > 1) {
            node <- node[1]
            warning("only the first value of 'node' has been considered")
        }
        if (is.character(node)) {
            if (is.null(phy$node.label))
                stop("the tree has no node labels")
            node <- which(phy$node.label %in% node) + Ntip
        }
        if (node <= Ntip)
            stop("node number must be greater than the number of tips")
    }
    if (node == ROOT) return(phy)
    phy <- reorder(phy) # insure it is in cladewise order
    root.node <- which(phy$edge[, 2] == node)
    start <- root.node + 1 # start of the clade looked for
    anc <- phy$edge[root.node, 1] # the ancestor of 'node'
    next.anc <- which(phy$edge[-(1:start), 1] <= anc) # find the next occurence 
of 'anc' or an 'older' node

    keep <- if (length(next.anc)) start + 0:(next.anc[1] - 1) else start:Nedge

    if (root.edge) {
        NewRootEdge <- phy$edge.length[root.node]
        root.edge <- root.edge - 1
        while (root.edge) {
            if (anc == ROOT) break
            i <- which(phy$edge[, 2] ==  anc)
            NewRootEdge <- NewRootEdge + phy$edge.length[i]
            root.edge <- root.edge - 1
            anc <- phy$edge[i, 1]
        }
        if (root.edge && !is.null(phy$root.edge))
            NewRootEdge <- NewRootEdge + phy$root.edge
        phy$root.edge <- NewRootEdge
    }

    phy$edge <- phy$edge[keep, ]
    if (wbl) phy$edge.length <- phy$edge.length[keep]
    TIPS <- phy$edge[, 2] <= Ntip
    tip <- phy$edge[TIPS, 2]
    phy$tip.label <- phy$tip.label[sort(tip)] # <- added sort to avoid 
shuffling of tip labels (2010-07-21)
    ## keep the ordering so no need to reorder tip.label:
    phy$edge[TIPS, 2] <- order(tip)
    if (!is.null(phy$node.label))
        phy$node.label <- phy$node.label[sort(unique(phy$edge[, 1])) - Ntip]
    Ntip <- length(phy$tip.label)
    phy$Nnode <- dim(phy$edge)[1] - Ntip + 1L
    ## The block below renumbers the nodes so that they conform
    ## to the "phylo" format -- same as in root()
    newNb <- integer(Ntip + phy$Nnode)
    newNb[node] <- Ntip + 1L
    sndcol <- phy$edge[, 2] > Ntip
    ## executed from right to left, so newNb is modified before phy$edge:
    phy$edge[sndcol, 2] <- newNb[phy$edge[sndcol, 2]] <-
        (Ntip + 2):(Ntip + phy$Nnode)
    phy$edge[, 1] <- newNb[phy$edge[, 1]]
    phy
}



# this returns the NUMBERS identifying each node
get_nodenums <- function(t)
    {
    # get just the unique node numbers from the edge list (left column: start 
node; right column: end node):
    nodenames = unique(c(t$edge))
    ordered_nodenames = nodenames[order(nodenames)]
    return(ordered_nodenames)
    }


get_nodenum_structural_root <- function(t, print_nodenum=FALSE)
        {
        #numnodes = length(t$tip.label) + length(t$node.label)
        #ordered_nodes = 1:length(numnodes)
        
        ordered_nodes = get_nodenums(t)

        root_nodenums_list = c()
        for (n in 1:length(ordered_nodes))
                {
                tmpnode = ordered_nodes[n]
                if (tmpnode %in% t$edge[,2])
                        {
                        blah = TRUE
                        }
                else
                        {
                        if (print_nodenum == TRUE)
                                {
                                cat("get_nodenum_structural_root(): Root 
nodenum = ", tmpnode, sep="")
                                }
                        root_nodenums_list = c(root_nodenums_list, tmpnode)
                        }
                }
        return(root_nodenums_list)
        }


# NOTE!!! THESE MATCH FUNCTIONS JUST RETURN THE *FIRST* MATCH, *NOT* ALL MATCHES
# (argh)
# return indices in 2nd list matching the first list
# It WILL return one match for each item in the list, though...
get_indices_where_list1_occurs_in_list2 <- function(list1, list2)
    {
    match_indices = match(list1, list2)
    return(match_indices)
    }


is.not.na <- function(x)
    {
    return(is.na(x) == FALSE)
    }



=========================

_______________________________________________
R-sig-phylo mailing list
R-sig-phylo@r-project.org
https://stat.ethz.ch/mailman/listinfo/r-sig-phylo

Re: [R-sig-phylo] cut a tree in a given time interval

Reply via email to