Re: [R-sig-phylo] cut a tree in a given time interval

Nick Matzke Thu, 14 Jul 2011 17:54:01 -0700

On 7/14/11 2:45 AM, ppi...@uniroma3.it wrote:
> Thankyou NIck,
> now...I have an error when running it:
>

Oops. Apologies, this stuff is in-house code, I haven'torganized it. Add these to the text file...


===========

get_daughters <- function(nodenum, t)
        {
        daughter_edgenums = findall(nodenum, t$edge[,1])
        daughter_nodenums = t$edge[,2][daughter_edgenums]
        return(daughter_nodenums)
        }

# Get indices of all matches to a list
findall <- function(what, inlist)
        {
        TFmatches = inlist == what
        indices = 1:length(inlist)
        matching_indices = indices[TFmatches]
        return(matching_indices)
        }

===========

object "get_daughters" not found
It does not seem I miss something........
some help?
thanks again
best
paolo


Oops, left that out.  Here it is as text and an
attached file.

You will have to do
library(ape)

and maybe

library(phangorn)

...ahead of time (hopefully not others...)

============



chainsaw<- function(tr, timepoint=10)
      {
      # Take a tree and saw it off evenly across a
certain
timepoint.
      # This removes any tips above the timepoint, and
replaces them
      # with a single tip representing the lineage
crossing
      # the timepoint (with a new tip name).

      # Get the tree in a table
      tr_table = prt(tr, printflag=FALSE)

      # Find the tips that are less than 10 my old and
drop them
      TF_exists_more_recently_than_10mya =
tr_table$time_bp<
timepoint

      # Get the corresponding labels
      labels_for_tips_existing_more_recently_than_10mya
=
tr_table$label[ TF_exists_more_recently_than_10mya ==
TRUE ]

      ###########################################
      # Draft chainsaw function
      ###########################################
      # loop through the branches that cross 10 mya

      # get a list of the edge start/stops in the
phylogeny's
edges
      edge_times_bp = get_edge_times_before_present(tr)

      # which of these branches cross 10 mya?
      edges_start_earlier_than_10mya = edge_times_bp[,
1]>
timepoint
      edges_end_later_than_10mya = edge_times_bp[, 2]
<=
timepoint
      edges_to_chainsaw =
edges_start_earlier_than_10mya +
edges_end_later_than_10mya == 2

      # then, for each of these edges, figure out how
many
tips exist descending from it
      nodes_to_chainsaw = tr$edge[,
2][edges_to_chainsaw]
      nodes_to_chainsaw =
nodes_to_chainsaw[nodes_to_chainsaw
  >  length(tr$tip.label)]

      # create a copy of the tree to chainsaw
      tree_to_chainsaw = tr

      for (i in 1:length(nodes_to_chainsaw))
          {
          tmp_subtree = extract.clade(tr,
nodes_to_chainsaw[i])
          #print(tmp_subtree$tip.label)

          tmp_number_of_tips =
length(tmp_subtree$tip.label)
          #print(tmp_number_of_tips)

          # number of tips to drop = (numtips -1)
          numtips_to_drop = tmp_number_of_tips - 1

          # tips_to_drop
          tmp_labels = tmp_subtree$tip.label

          labels_to_drop =
tmp_labels[1:numtips_to_drop]

          # new label
          label_kept_num = length(tmp_labels)
          label_kept = tmp_labels[label_kept_num]
          new_label = paste("CA_", label_kept, "+",
numtips_to_drop, "_tips", sep="")

tree_to_chainsaw$tip.label[tree_to_chainsaw$tip.label
==
label_kept] = new_label

          # chop off e.g. 2 of the 3 tips
          tree_to_chainsaw = drop.tip(tree_to_chainsaw,
labels_to_drop)

          }
      #plot(tree_to_chainsaw)
      #axisPhylo()

      tree_to_chainsaw_table = prt(tree_to_chainsaw,
printflag=FALSE)

      tree_to_chainsaw_table_tips_TF_time_bp_LT_10my =
tree_to_chainsaw_table$time_bp<  timepoint


      tmp_edge_lengths =
tree_to_chainsaw_table$edge.length[tree_to_chainsaw_table_tips_TF_time_bp_LT_10my]

      times_bp_for_edges_to_chainsaw =
tree_to_chainsaw_table$time_bp[tree_to_chainsaw_table_tips_TF_time_bp_LT_10my]

      adjustment = times_bp_for_edges_to_chainsaw -
timepoint

      revised_tmp_edge_lengths = tmp_edge_lengths +
adjustment


tree_to_chainsaw_table$edge.length[tree_to_chainsaw_table_tips_TF_time_bp_LT_10my]
= revised_tmp_edge_lengths

      # revised
      ordered_nodenames =
get_nodenums(tree_to_chainsaw)
      parent_branches =
get_indices_where_list1_occurs_in_list2(ordered_nodenames,
tree_to_chainsaw$edge[,2])

      NA_false =
is.not.na(tree_to_chainsaw_table$edge.length)

      tree_to_chainsaw$edge.length[parent_branches[NA_false]]
= tree_to_chainsaw_table$edge.length[NA_false]

      return(tree_to_chainsaw)
      }


# print tree in hierarchical format
prt<- function(t, printflag=TRUE, relabel_nodes =
FALSE,
time_bp_digits=7)
      {
      # assemble beginning table

      # check if internal node labels exist
      if ("node.label" %in% attributes(t)$names ==
FALSE)
          {
          rootnum = get_nodenum_structural_root(t)

          new_node_labels = paste("inNode",
rootnum:(rootnum+t$Nnode-1), sep="")
          t$node.label = new_node_labels
          }

      # or manually relabel the internal nodes, if
desired
      if (relabel_nodes == TRUE)
          {
          rootnum = get_nodenum_structural_root(t)

          new_node_labels = paste("inNode",
rootnum:(rootnum+t$Nnode-1), sep="")
          t$node.label = new_node_labels
          }

      labels = c(t$tip.label, t$node.label)
      ordered_nodenames = get_nodenums(t)
      #nodenums = 1:length(labels)
      node.types1 = rep("tip", length(t$tip.label))
      node.types2 = rep("internal",
length(t$node.label))
      node.types2[1] = "root"
      node.types = c(node.types1, node.types2)

      # These are the index numbers of the edges below
each node
      parent_branches =
get_indices_where_list1_occurs_in_list2(ordered_nodenames,
t$edge[,2])
      #parent_edges = parent_branches
      brlen_to_parent = t$edge.length[parent_branches]

      parent_nodes = t$edge[,1][parent_branches]
      daughter_nodes = lapply(ordered_nodenames,
get_daughters, t)

      # print out the structural root, if desired
      root_nodenum = get_nodenum_structural_root(t)
      tmpstr = paste("prt(t): root=", root_nodenum,
"\n", sep="")
      prflag(tmpstr, printflag=printflag)

      levels_for_nodes =
unlist(lapply(ordered_nodenames,
get_level, t))
      #tmplevel = get_level(23, t)
      #print(tmplevel)


      #height above root
      hts_at_end_of_branches_aka_at_nodes =
t$edge.length
      hts_at_end_of_branches_aka_at_nodes =
get_all_node_ages(t)
      h = hts_at_end_of_branches_aka_at_nodes

      # times before present, below (ultrametric!) tips
      # numbers are positive, i.e. in millions of years
before present
      #                       i.e. mybp, Ma
      times_before_present = get_max_height_tree(t) - h


      # fill in the ages of each node for the edges
      edge_ages = t$edge
      edge_ages[,1] = h[t$edge[,1]]    # bottom of
branch
      edge_ages[,2] = h[t$edge[,2]]    # top of branch


      # fill in the times before present of each node
for the
edges
      edge_times_bp = t$edge
      edge_times_bp[,1] =
times_before_present[t$edge[,1]]
   # bottom of branch
      edge_times_bp[,2] =
times_before_present[t$edge[,2]]
   # top of branch



      tmpdtf = cbind(1:length(ordered_nodenames),
ordered_nodenames, levels_for_nodes, node.types,
parent_branches, brlen_to_parent, parent_nodes,
daughter_nodes, h, round(times_before_present,
digits=time_bp_digits), labels)

      dtf = as.data.frame(tmpdtf, row.names=NULL)
      # nd = node

      # edge.length is the same as brlen_2_parent
      names(dtf) = c("node", "ord_ndname", "node_lvl",
"node.type", "parent_br", "edge.length", "ancestor",
"daughter_nds", "node_ht", "time_bp", "label")

      # convert the cols from class "list" to some
natural class
      dtf = unlist_dtf_cols(dtf, printflag=FALSE)

      # print if desired
      prflag(dtf, printflag=printflag)

      #tree_strings = c()
      #root_str = get_node_info(root_nodenum, t)
      return(dtf)
      }



get_edge_times_before_present<- function(t)
      {
      #height above root
      hts_at_end_of_branches_aka_at_nodes =
t$edge.length
      hts_at_end_of_branches_aka_at_nodes =
get_all_node_ages(t)
      h = hts_at_end_of_branches_aka_at_nodes

      # times before present, below (ultrametric!) tips
      # numbers are positive, i.e. in millions of years
before present
      #                       i.e. mybp, Ma
      times_before_present = get_max_height_tree(t) - h


      # fill in the ages of each node for the edges
      edge_ages = t$edge
      edge_ages[,1] = h[t$edge[,1]]    # bottom of
branch
      edge_ages[,2] = h[t$edge[,2]]    # top of branch

      # fill in the times before present of each node
for the
edges
      edge_times_bp = t$edge
      edge_times_bp[,1] =
times_before_present[t$edge[,1]]
   # bottom of branch
      edge_times_bp[,2] =
times_before_present[t$edge[,2]]
   # top of branch

      return(edge_times_bp)
      }


extract.clade<- function(phy, node, root.edge = 0,
interactive = FALSE)
{
      Ntip<- length(phy$tip.label)
      ROOT<- Ntip + 1
      Nedge<- dim(phy$edge)[1]
      wbl<- !is.null(phy$edge.length)
      if (interactive) node<- identify(phy)$nodes else
{
          if (length(node)>  1) {
              node<- node[1]
              warning("only the first value of 'node'
has
been considered")
          }
          if (is.character(node)) {
              if (is.null(phy$node.label))
                  stop("the tree has no node labels")
              node<- which(phy$node.label %in% node) +
Ntip
          }
          if (node<= Ntip)
              stop("node number must be greater than
the
number of tips")
      }
      if (node == ROOT) return(phy)
      phy<- reorder(phy) # insure it is in cladewise
order
      root.node<- which(phy$edge[, 2] == node)
      start<- root.node + 1 # start of the clade
looked for
      anc<- phy$edge[root.node, 1] # the ancestor of
'node'
      next.anc<- which(phy$edge[-(1:start), 1]<= anc)
#
find the next occurence of 'anc' or an 'older' node

      keep<- if (length(next.anc)) start +
0:(next.anc[1] -
1) else start:Nedge

      if (root.edge) {
          NewRootEdge<- phy$edge.length[root.node]
          root.edge<- root.edge - 1
          while (root.edge) {
              if (anc == ROOT) break
              i<- which(phy$edge[, 2] ==  anc)
              NewRootEdge<- NewRootEdge +
phy$edge.length[i]
              root.edge<- root.edge - 1
              anc<- phy$edge[i, 1]
          }
          if (root.edge&&  !is.null(phy$root.edge))
              NewRootEdge<- NewRootEdge +
phy$root.edge
          phy$root.edge<- NewRootEdge
      }

      phy$edge<- phy$edge[keep, ]
      if (wbl) phy$edge.length<- phy$edge.length[keep]
      TIPS<- phy$edge[, 2]<= Ntip
      tip<- phy$edge[TIPS, 2]
      phy$tip.label<- phy$tip.label[sort(tip)] #<-
added
sort to avoid shuffling of tip labels (2010-07-21)
      ## keep the ordering so no need to reorder
tip.label:
      phy$edge[TIPS, 2]<- order(tip)
      if (!is.null(phy$node.label))
          phy$node.label<-
phy$node.label[sort(unique(phy$edge[, 1])) - Ntip]
      Ntip<- length(phy$tip.label)
      phy$Nnode<- dim(phy$edge)[1] - Ntip + 1L
      ## The block below renumbers the nodes so that
they conform
      ## to the "phylo" format -- same as in root()
      newNb<- integer(Ntip + phy$Nnode)
      newNb[node]<- Ntip + 1L
      sndcol<- phy$edge[, 2]>  Ntip
      ## executed from right to left, so newNb is
modified
before phy$edge:
      phy$edge[sndcol, 2]<- newNb[phy$edge[sndcol, 2]]
<-
          (Ntip + 2):(Ntip + phy$Nnode)
      phy$edge[, 1]<- newNb[phy$edge[, 1]]
      phy
}



# this returns the NUMBERS identifying each node
get_nodenums<- function(t)
      {
      # get just the unique node numbers from the edge
list
(left column: start node; right column: end node):
      nodenames = unique(c(t$edge))
      ordered_nodenames = nodenames[order(nodenames)]
      return(ordered_nodenames)
      }


get_nodenum_structural_root<- function(t,
print_nodenum=FALSE)
        {
        #numnodes = length(t$tip.label) +
length(t$node.label)
        #ordered_nodes = 1:length(numnodes)

        ordered_nodes = get_nodenums(t)

        root_nodenums_list = c()
        for (n in 1:length(ordered_nodes))
                {
                tmpnode = ordered_nodes[n]
                if (tmpnode %in% t$edge[,2])
                        {
                        blah = TRUE
                        }
                else
                        {
                        if (print_nodenum == TRUE)
                                {
                                cat("get_nodenum_structural_root(): Root 
nodenum =
",
tmpnode, sep="")
                                }
                        root_nodenums_list = c(root_nodenums_list, tmpnode)
                        }
                }
        return(root_nodenums_list)
        }


# NOTE!!! THESE MATCH FUNCTIONS JUST RETURN THE
*FIRST*
MATCH, *NOT* ALL MATCHES
# (argh)
# return indices in 2nd list matching the first list
# It WILL return one match for each item in the list,
though...
get_indices_where_list1_occurs_in_list2<-
function(list1,
list2)
      {
      match_indices = match(list1, list2)
      return(match_indices)
      }


is.not.na<- function(x)
      {
      return(is.na(x) == FALSE)
      }



=========================




============

On 7/13/11 4:42 AM, ppi...@uniroma3.it wrote:

Hi Nick,

I try to run but "get_nodenum_structural_root" is a
non defined function; I miss smething?

Due to my email formatting I spent time to re-adjust
commenting line that were paragraphed

could you send me the .R file source?
..together with get_nodenum_structural_root
thanks in advance

ciao
paolo












Here's chainsaw().  It also requires sourcing a few
other
functions.  Please cite me if you use it :-).

=========================

chainsaw<- function(tr, timepoint=10)
        {
        # Take a tree and saw it off evenly across a
certain
timepoint.
        # This removes any tips above the timepoint, and
replaces them
        # with a single tip representing the lineage
crossing
        # the timepoint (with a new tip name).

        # Get the tree in a table
        tr_table = prt(tr, printflag=FALSE)

        # Find the tips that are less than 10 my old and
drop
them
        TF_exists_more_recently_than_10mya =
tr_table$time_bp
<
timepoint

        # Get the corresponding labels
        labels_for_tips_existing_more_recently_than_10mya =
tr_table$label[ TF_exists_more_recently_than_10mya
==
TRUE ]

        ###########################################
        # Draft chainsaw function
        ###########################################
        # loop through the branches that cross 10 mya

        # get a list of the edge start/stops in the
phylogeny's edges
        edge_times_bp = get_edge_times_before_present(tr)

        # which of these branches cross 10 mya?
        edges_start_earlier_than_10mya = edge_times_bp[,
1]>
timepoint
        edges_end_later_than_10mya = edge_times_bp[, 2]<=
timepoint
        edges_to_chainsaw = edges_start_earlier_than_10mya
+
edges_end_later_than_10mya == 2

        # then, for each of these edges, figure out how
many
tips
exist descending from it
        nodes_to_chainsaw = tr$edge[, 2][edges_to_chainsaw]
        nodes_to_chainsaw =
nodes_to_chainsaw[nodes_to_chainsaw>
length(tr$tip.label)]

        # create a copy of the tree to chainsaw
        tree_to_chainsaw = tr

        for (i in 1:length(nodes_to_chainsaw))
                {
                tmp_subtree = extract.clade(tr,
nodes_to_chainsaw[i])
                #print(tmp_subtree$tip.label)

                tmp_number_of_tips = length(tmp_subtree$tip.label)
                #print(tmp_number_of_tips)

                # number of tips to drop = (numtips -1)
                numtips_to_drop = tmp_number_of_tips - 1

                # tips_to_drop
                tmp_labels = tmp_subtree$tip.label

                labels_to_drop = tmp_labels[1:numtips_to_drop]

                # new label
                label_kept_num = length(tmp_labels)
                label_kept = tmp_labels[label_kept_num]
                new_label = paste("CA_", label_kept, "+",
numtips_to_drop,
"_tips", sep="")
                tree_to_chainsaw$tip.label[tree_to_chainsaw$tip.label
==
label_kept] = new_label

                # chop off e.g. 2 of the 3 tips
                tree_to_chainsaw = drop.tip(tree_to_chainsaw,
labels_to_drop)

                }
        #plot(tree_to_chainsaw)
        #axisPhylo()

        tree_to_chainsaw_table = prt(tree_to_chainsaw,
printflag=FALSE)

        tree_to_chainsaw_table_tips_TF_time_bp_LT_10my =
tree_to_chainsaw_table$time_bp<   timepoint


        tmp_edge_lengths =
tree_to_chainsaw_table$edge.length[tree_to_chainsaw_table_tips_TF_time_bp_LT_10my]

        times_bp_for_edges_to_chainsaw =
tree_to_chainsaw_table$time_bp[tree_to_chainsaw_table_tips_TF_time_bp_LT_10my]

        adjustment = times_bp_for_edges_to_chainsaw -
timepoint

        revised_tmp_edge_lengths = tmp_edge_lengths +
adjustment

        
tree_to_chainsaw_table$edge.length[tree_to_chainsaw_table_tips_TF_time_bp_LT_10my]
= revised_tmp_edge_lengths

        # revised
        ordered_nodenames = get_nodenums(tree_to_chainsaw)
        parent_branches =
get_indices_where_list1_occurs_in_list2(ordered_nodenames,
tree_to_chainsaw$edge[,2])

        NA_false =
is.not.na(tree_to_chainsaw_table$edge.length)

        tree_to_chainsaw$edge.length[parent_branches[NA_false]]
=
tree_to_chainsaw_table$edge.length[NA_false]

        return(tree_to_chainsaw)
        }


# print tree in hierarchical format
prt<- function(t, printflag=TRUE, relabel_nodes =
FALSE,
time_bp_digits=7)
        {
        # assemble beginning table

        # check if internal node labels exist
        if ("node.label" %in% attributes(t)$names == FALSE)
                {
                rootnum = get_nodenum_structural_root(t)

                new_node_labels = paste("inNode",
rootnum:(rootnum+t$Nnode-1), sep="")
                t$node.label = new_node_labels
                }

        # or manually relabel the internal nodes, if
desired
        if (relabel_nodes == TRUE)
                {
                rootnum = get_nodenum_structural_root(t)

                new_node_labels = paste("inNode",
rootnum:(rootnum+t$Nnode-1), sep="")
                t$node.label = new_node_labels
                }

        labels = c(t$tip.label, t$node.label)
        ordered_nodenames = get_nodenums(t)
        #nodenums = 1:length(labels)
        node.types1 = rep("tip", length(t$tip.label))
        node.types2 = rep("internal", length(t$node.label))
        node.types2[1] = "root"
        node.types = c(node.types1, node.types2)

        # These are the index numbers of the edges below
each
node
        parent_branches =
get_indices_where_list1_occurs_in_list2(ordered_nodenames,
t$edge[,2])
        #parent_edges = parent_branches
        brlen_to_parent = t$edge.length[parent_branches]

        parent_nodes = t$edge[,1][parent_branches]
        daughter_nodes = lapply(ordered_nodenames,
get_daughters, t)

        # print out the structural root, if desired
        root_nodenum = get_nodenum_structural_root(t)
        tmpstr = paste("prt(t): root=", root_nodenum, "\n",
sep="")
        prflag(tmpstr, printflag=printflag)

        levels_for_nodes = unlist(lapply(ordered_nodenames,
get_level, t))
        #tmplevel = get_level(23, t)
        #print(tmplevel)


        #height above root
        hts_at_end_of_branches_aka_at_nodes = t$edge.length
        hts_at_end_of_branches_aka_at_nodes =
get_all_node_ages(t)
        h = hts_at_end_of_branches_aka_at_nodes

        # times before present, below (ultrametric!) tips
        # numbers are positive, i.e. in millions of years
before
present
        #                       i.e. mybp, Ma
        times_before_present = get_max_height_tree(t) - h


        # fill in the ages of each node for the edges
        edge_ages = t$edge
        edge_ages[,1] = h[t$edge[,1]]   # bottom of branch
        edge_ages[,2] = h[t$edge[,2]]   # top of branch


        # fill in the times before present of each node for
the edges
        edge_times_bp = t$edge
        edge_times_bp[,1] =
times_before_present[t$edge[,1]]        #
bottom of branch
        edge_times_bp[,2] =
times_before_present[t$edge[,2]]        # top
of branch



        tmpdtf = cbind(1:length(ordered_nodenames),
ordered_nodenames, levels_for_nodes, node.types,
parent_branches, brlen_to_parent, parent_nodes,
daughter_nodes, h, round(times_before_present,
digits=time_bp_digits), labels)

        dtf = as.data.frame(tmpdtf, row.names=NULL)
        # nd = node

        # edge.length is the same as brlen_2_parent
        names(dtf) = c("node", "ord_ndname", "node_lvl",
"node.type", "parent_br", "edge.length", "ancestor",
"daughter_nds", "node_ht", "time_bp", "label")

        # convert the cols from class "list" to some
natural
class
        dtf = unlist_dtf_cols(dtf, printflag=FALSE)

        # print if desired
        prflag(dtf, printflag=printflag)

        #tree_strings = c()
        #root_str = get_node_info(root_nodenum, t)
        return(dtf)
        }



get_edge_times_before_present<- function(t)
        {
        #height above root
        hts_at_end_of_branches_aka_at_nodes = t$edge.length
        hts_at_end_of_branches_aka_at_nodes =
get_all_node_ages(t)
        h = hts_at_end_of_branches_aka_at_nodes

        # times before present, below (ultrametric!) tips
        # numbers are positive, i.e. in millions of years
before
present
        #                       i.e. mybp, Ma
        times_before_present = get_max_height_tree(t) - h


        # fill in the ages of each node for the edges
        edge_ages = t$edge
        edge_ages[,1] = h[t$edge[,1]]   # bottom of branch
        edge_ages[,2] = h[t$edge[,2]]   # top of branch

        # fill in the times before present of each node for
the edges
        edge_times_bp = t$edge
        edge_times_bp[,1] =
times_before_present[t$edge[,1]]        #
bottom of branch
        edge_times_bp[,2] =
times_before_present[t$edge[,2]]        # top
of branch

        return(edge_times_bp)
        }


extract.clade<- function(phy, node, root.edge = 0,
interactive = FALSE)
{
       Ntip<- length(phy$tip.label)
       ROOT<- Ntip + 1
       Nedge<- dim(phy$edge)[1]
       wbl<- !is.null(phy$edge.length)
       if (interactive) node<- identify(phy)$nodes
else
{
           if (length(node)>   1) {
               node<- node[1]
               warning("only the first value of
'node'
has
been considered")
           }
           if (is.character(node)) {
               if (is.null(phy$node.label))
                   stop("the tree has no node
labels")
               node<- which(phy$node.label %in% node)
+
Ntip
           }
           if (node<= Ntip)
               stop("node number must be greater than
the
number of tips")
       }
       if (node == ROOT) return(phy)
       phy<- reorder(phy) # insure it is in cladewise
order
       root.node<- which(phy$edge[, 2] == node)
       start<- root.node + 1 # start of the clade
looked for
       anc<- phy$edge[root.node, 1] # the ancestor of
'node'
       next.anc<- which(phy$edge[-(1:start), 1]<=
anc)
#
find the next occurence of 'anc' or an 'older' node

       keep<- if (length(next.anc)) start +
0:(next.anc[1] -
1) else start:Nedge

       if (root.edge) {
           NewRootEdge<- phy$edge.length[root.node]
           root.edge<- root.edge - 1
           while (root.edge) {
               if (anc == ROOT) break
               i<- which(phy$edge[, 2] ==  anc)
               NewRootEdge<- NewRootEdge +
phy$edge.length[i]
               root.edge<- root.edge - 1
               anc<- phy$edge[i, 1]
           }
           if (root.edge&&   !is.null(phy$root.edge))
               NewRootEdge<- NewRootEdge +
phy$root.edge
           phy$root.edge<- NewRootEdge
       }

       phy$edge<- phy$edge[keep, ]
       if (wbl) phy$edge.length<-
phy$edge.length[keep]
       TIPS<- phy$edge[, 2]<= Ntip
       tip<- phy$edge[TIPS, 2]
       phy$tip.label<- phy$tip.label[sort(tip)] #<-
added
sort to avoid shuffling of tip labels (2010-07-21)
       ## keep the ordering so no need to reorder
tip.label:
       phy$edge[TIPS, 2]<- order(tip)
       if (!is.null(phy$node.label))
           phy$node.label<-
phy$node.label[sort(unique(phy$edge[, 1])) - Ntip]
       Ntip<- length(phy$tip.label)
       phy$Nnode<- dim(phy$edge)[1] - Ntip + 1L
       ## The block below renumbers the nodes so that
they conform
       ## to the "phylo" format -- same as in root()
       newNb<- integer(Ntip + phy$Nnode)
       newNb[node]<- Ntip + 1L
       sndcol<- phy$edge[, 2]>   Ntip
       ## executed from right to left, so newNb is
modified
before phy$edge:
       phy$edge[sndcol, 2]<- newNb[phy$edge[sndcol,
2]]
<-
           (Ntip + 2):(Ntip + phy$Nnode)
       phy$edge[, 1]<- newNb[phy$edge[, 1]]
       phy
}



# this returns the NUMBERS identifying each node
get_nodenums<- function(t)
        {
        # get just the unique node numbers from the edge
list
(left
column: start node; right column: end node):
        nodenames = unique(c(t$edge))
        ordered_nodenames = nodenames[order(nodenames)]
        return(ordered_nodenames)
        }

# NOTE!!! THESE MATCH FUNCTIONS JUST RETURN THE
*FIRST*
MATCH, *NOT* ALL MATCHES
# (argh)
# return indices in 2nd list matching the first list
# It WILL return one match for each item in the
list,
though...
get_indices_where_list1_occurs_in_list2<-
function(list1,
list2)
        {
        match_indices = match(list1, list2)
        return(match_indices)
        }


is.not.na<- function(x)
        {
        return(is.na(x) == FALSE)
        }



=========================



On 7/12/11 10:06 PM, David Bapst wrote:

Do you want a function that gives you a tree of
taxa
observed in a time bin
(like, in the fossil record, as in Ruta et al.,
2007)? Or gives you the tree
of the lineages extant at point in time based on
some global phylogeny? If
that is what you want, the following function I
wrote for doing time-slices
on trees should do that. My most recent version
(not
posted) has some
additions so that the same time-slice can be taken
across multiple trees
that may have the root in different place. (These
modifications use the
earliest fossil species as an anchor time.)

This version of the code seems to work okay, for
example try:
tree1<-rtree(500)
timeslice.phy(tree1,slice.time=4,plot=T)

Let me know if this works alright,
-Dave, UChicago


timeslice.phy<-function(ttree,slice.time,plot=T){
       #take a phylogeny and produce a phylogenetic
'slice' at time X
           #makes an ultrametric tree, as if made of
lineages extant at time X
           #where X is a point in time after root
time
= 0
       require(ape)
       tslice<-slice.time #time from root to slice
time
       #first let's drop all edges that branch later
than the slice
       #make them all single lineages by dropping
all
but one taxon
       dnode<-dist.nodes(ttree)[,Ntip(ttree)+1]
       #identify the ancestor nodes of edges which
cross the tslice
       cedge<-which(sapply(1:Nedge(ttree),function(x)
any(ttree$edge[x,1]==which(dnode<tslice))
               &
any(ttree$edge[x,2]==which(dnode>=tslice))))
       droppers<-numeric()
       for(i in 1:length(cedge)){
           desc<-ttree$edge[cedge[i],2]
           if(desc>Ntip(ttree)){    #if an internal
edge that goes past the
tslice
               desctip<-unlist(prop.part(ttree)[desc-Ntip(ttree)])
     #drop all
but one tip
               droppers<-c(droppers,desctip[-1])
           }}
       stree<-drop.tip(ttree,droppers)
       #which edges cross over tslice?
       dnode<-dist.nodes(stree)[,Ntip(stree)+1]
       cedge<-sapply(1:Nedge(stree),function(x)
any(stree$edge[x,2]==which(dnode>=tslice)))
       cnode_depth<-dnode[stree$edge[cedge,1]]
       stree$edge.length[cedge]<-tslice-cnode_depth
       #last, let's drop all terminal taxa that are
less than the tslice
       dnode<-dist.nodes(stree)[,Ntip(stree)+1]
       droppers<-which((dnode[1:Ntip(stree)]+0.1)<tslice)
       stree<-drop.tip(stree,droppers)

if(plot){layout(matrix(1:2,,2));plot(ladderize(ttree),show.tip.label=F);axisPhylo();plot(stree,show.tip.label=F)}
       stree
       }

On Tue, Jul 12, 2011 at 3:19
PM,<ppi...@uniroma3.it>
   wrote:

Hi all,
someone knows a smart coding to cut a tree in
order
to
retain only taxa present in a given time interval
bin?

Thanks in advance for any suggestion
best
paolo

_______________________________________________
R-sig-phylo mailing list
R-sig-phylo@r-project.org
https://stat.ethz.ch/mailman/listinfo/r-sig-phylo


--
====================================================
Nicholas J. Matzke
Ph.D. Candidate, Graduate Student Researcher

Huelsenbeck Lab
Center for Theoretical Evolutionary Genomics
4151 VLSB (Valley Life Sciences Building)
Department of Integrative Biology
University of California, Berkeley

Graduate Student Instructor, IB200B
Principles of Phylogenetics: Ecology and Evolution
http://ib.berkeley.edu/courses/ib200b/
http://phylo.wikidot.com/


Lab websites:
http://ib.berkeley.edu/people/lab_detail.php?lab=54
http://fisher.berkeley.edu/cteg/hlab.html
Dept. personal page:
http://ib.berkeley.edu/people/students/person_detail.php?person=370
Lab personal page:
http://fisher.berkeley.edu/cteg/members/matzke.html
Lab phone: 510-643-6299
Dept. fax: 510-643-6264

Cell phone: 510-301-0179
Email: mat...@berkeley.edu

Mailing address:
Department of Integrative Biology
3060 VLSB #3140
Berkeley, CA 94720-3140

-----------------------------------------------------
"[W]hen people thought the earth was flat, they were
wrong.
When people thought the earth was spherical, they
were
wrong. But if you think that thinking the earth is
spherical
is just as wrong as thinking the earth is flat, then
your
view is wronger than both of them put together."

Isaac Asimov (1989). "The Relativity of Wrong." The
Skeptical Inquirer, 14(1), 35-44. Fall 1989.
http://chem.tufts.edu/AnswersInScience/RelativityofWrong.htm

_______________________________________________
R-sig-phylo mailing list
R-sig-phylo@r-project.org
https://stat.ethz.ch/mailman/listinfo/r-sig-phylo


--
====================================================
Nicholas J. Matzke
Ph.D. Candidate, Graduate Student Researcher

Huelsenbeck Lab
Center for Theoretical Evolutionary Genomics
4151 VLSB (Valley Life Sciences Building)
Department of Integrative Biology
University of California, Berkeley

Graduate Student Instructor, IB200B
Principles of Phylogenetics: Ecology and Evolution
http://ib.berkeley.edu/courses/ib200b/
http://phylo.wikidot.com/


Lab websites:
http://ib.berkeley.edu/people/lab_detail.php?lab=54
http://fisher.berkeley.edu/cteg/hlab.html
Dept. personal page:
http://ib.berkeley.edu/people/students/person_detail.php?person=370
Lab personal page:
http://fisher.berkeley.edu/cteg/members/matzke.html
Lab phone: 510-643-6299
Dept. fax: 510-643-6264

Cell phone: 510-301-0179
Email: mat...@berkeley.edu

Mailing address:
Department of Integrative Biology
3060 VLSB #3140
Berkeley, CA 94720-3140

-----------------------------------------------------
"[W]hen people thought the earth was flat, they were
wrong.
When people thought the earth was spherical, they were
wrong. But if you think that thinking the earth is
spherical
is just as wrong as thinking the earth is flat, then
your
view is wronger than both of them put together."

Isaac Asimov (1989). "The Relativity of Wrong." The
Skeptical Inquirer, 14(1), 35-44. Fall 1989.
http://chem.tufts.edu/AnswersInScience/RelativityofWrong.htm
====================================================


--
====================================================
Nicholas J. Matzke
Ph.D. Candidate, Graduate Student Researcher

Huelsenbeck Lab
Center for Theoretical Evolutionary Genomics
4151 VLSB (Valley Life Sciences Building)
Department of Integrative Biology
University of California, Berkeley

Graduate Student Instructor, IB200B
Principles of Phylogenetics: Ecology and Evolution
http://ib.berkeley.edu/courses/ib200b/
http://phylo.wikidot.com/


Lab websites:
http://ib.berkeley.edu/people/lab_detail.php?lab=54
http://fisher.berkeley.edu/cteg/hlab.html

Dept. personal page:http://ib.berkeley.edu/people/students/person_detail.php?person=370Lab personal page:http://fisher.berkeley.edu/cteg/members/matzke.html

Lab phone: 510-643-6299
Dept. fax: 510-643-6264

Cell phone: 510-301-0179
Email: mat...@berkeley.edu

Mailing address:
Department of Integrative Biology
3060 VLSB #3140
Berkeley, CA 94720-3140

-----------------------------------------------------

"[W]hen people thought the earth was flat, they were wrong.When people thought the earth was spherical, they werewrong. But if you think that thinking the earth is sphericalis just as wrong as thinking the earth is flat, then yourview is wronger than both of them put together."

Isaac Asimov (1989). "The Relativity of Wrong." TheSkeptical Inquirer, 14(1), 35-44. Fall 1989.

http://chem.tufts.edu/AnswersInScience/RelativityofWrong.htm

_______________________________________________
R-sig-phylo mailing list
R-sig-phylo@r-project.org
https://stat.ethz.ch/mailman/listinfo/r-sig-phylo

Re: [R-sig-phylo] cut a tree in a given time interval

Reply via email to