Re: [R-sig-phylo] cut a tree in a given time interval

2011-07-14 Thread Nick Matzke

On 7/14/11 2:45 AM, ppi...@uniroma3.it wrote:
 Thankyou NIck,
 now...I have an error when running it:




Oops. Apologies, this stuff is in-house code, I haven't 
organized it. Add these to the text file...


===

get_daughters - function(nodenum, t)
{
daughter_edgenums = findall(nodenum, t$edge[,1])
daughter_nodenums = t$edge[,2][daughter_edgenums]
return(daughter_nodenums)
}

# Get indices of all matches to a list
findall - function(what, inlist)
{
TFmatches = inlist == what
indices = 1:length(inlist)
matching_indices = indices[TFmatches]
return(matching_indices)
}

===





object get_daughters not found
It does not seem I miss something
some help?
thanks again
best
paolo


Oops, left that out.  Here it is as text and an
attached file.

You will have to do
library(ape)

and maybe

library(phangorn)

...ahead of time (hopefully not others...)





chainsaw- function(tr, timepoint=10)
  {
  # Take a tree and saw it off evenly across a
certain
timepoint.
  # This removes any tips above the timepoint, and
replaces them
  # with a single tip representing the lineage
crossing
  # the timepoint (with a new tip name).

  # Get the tree in a table
  tr_table = prt(tr, printflag=FALSE)

  # Find the tips that are less than 10 my old and
drop them
  TF_exists_more_recently_than_10mya =
tr_table$time_bp
timepoint

  # Get the corresponding labels
  labels_for_tips_existing_more_recently_than_10mya
=
tr_table$label[ TF_exists_more_recently_than_10mya ==
TRUE ]

  ###
  # Draft chainsaw function
  ###
  # loop through the branches that cross 10 mya

  # get a list of the edge start/stops in the
phylogeny's
edges
  edge_times_bp = get_edge_times_before_present(tr)

  # which of these branches cross 10 mya?
  edges_start_earlier_than_10mya = edge_times_bp[,
1]
timepoint
  edges_end_later_than_10mya = edge_times_bp[, 2]
=
timepoint
  edges_to_chainsaw =
edges_start_earlier_than_10mya +
edges_end_later_than_10mya == 2

  # then, for each of these edges, figure out how
many
tips exist descending from it
  nodes_to_chainsaw = tr$edge[,
2][edges_to_chainsaw]
  nodes_to_chainsaw =
nodes_to_chainsaw[nodes_to_chainsaw
length(tr$tip.label)]

  # create a copy of the tree to chainsaw
  tree_to_chainsaw = tr

  for (i in 1:length(nodes_to_chainsaw))
  {
  tmp_subtree = extract.clade(tr,
nodes_to_chainsaw[i])
  #print(tmp_subtree$tip.label)

  tmp_number_of_tips =
length(tmp_subtree$tip.label)
  #print(tmp_number_of_tips)

  # number of tips to drop = (numtips -1)
  numtips_to_drop = tmp_number_of_tips - 1

  # tips_to_drop
  tmp_labels = tmp_subtree$tip.label

  labels_to_drop =
tmp_labels[1:numtips_to_drop]

  # new label
  label_kept_num = length(tmp_labels)
  label_kept = tmp_labels[label_kept_num]
  new_label = paste(CA_, label_kept, +,
numtips_to_drop, _tips, sep=)

tree_to_chainsaw$tip.label[tree_to_chainsaw$tip.label
==
label_kept] = new_label

  # chop off e.g. 2 of the 3 tips
  tree_to_chainsaw = drop.tip(tree_to_chainsaw,
labels_to_drop)

  }
  #plot(tree_to_chainsaw)
  #axisPhylo()

  tree_to_chainsaw_table = prt(tree_to_chainsaw,
printflag=FALSE)

  tree_to_chainsaw_table_tips_TF_time_bp_LT_10my =
tree_to_chainsaw_table$time_bp  timepoint


  tmp_edge_lengths =
tree_to_chainsaw_table$edge.length[tree_to_chainsaw_table_tips_TF_time_bp_LT_10my]

  times_bp_for_edges_to_chainsaw =
tree_to_chainsaw_table$time_bp[tree_to_chainsaw_table_tips_TF_time_bp_LT_10my]

  adjustment = times_bp_for_edges_to_chainsaw -
timepoint

  revised_tmp_edge_lengths = tmp_edge_lengths +
adjustment


tree_to_chainsaw_table$edge.length[tree_to_chainsaw_table_tips_TF_time_bp_LT_10my]
= revised_tmp_edge_lengths

  # revised
  ordered_nodenames =
get_nodenums(tree_to_chainsaw)
  parent_branches =
get_indices_where_list1_occurs_in_list2(ordered_nodenames,
tree_to_chainsaw$edge[,2])

  NA_false =
is.not.na(tree_to_chainsaw_table$edge.length)

  tree_to_chainsaw$edge.length[parent_branches[NA_false]]
= tree_to_chainsaw_table$edge.length[NA_false]

  return(tree_to_chainsaw)
  }


# print tree in hierarchical format
prt- function(t, printflag=TRUE, relabel_nodes =
FALSE,
time_bp_digits=7)
  {
  # assemble beginning table

  # check if internal node labels exist
  if (node.label %in% attributes(t)$names ==
FALSE)
  {
  rootnum = get_nodenum_structural_root(t)

  new_node_labels = paste(inNode,
rootnum:(rootnum+t$Nnode-1), sep=)
  t$node.label = new_node_labels
  }

  # or manually relabel 

Re: [R-sig-phylo] cut a tree in a given time interval

2011-07-13 Thread Nick Matzke

Oops, left that out.  Here it is as text and an attached file.

You will have to do
library(ape)

and maybe

library(phangorn)

...ahead of time (hopefully not others...)





chainsaw - function(tr, timepoint=10)
{
# Take a tree and saw it off evenly across a certain 
timepoint.
# This removes any tips above the timepoint, and 
replaces them

# with a single tip representing the lineage crossing
# the timepoint (with a new tip name).

# Get the tree in a table
tr_table = prt(tr, printflag=FALSE)

# Find the tips that are less than 10 my old and drop them
TF_exists_more_recently_than_10mya = tr_table$time_bp  
timepoint


# Get the corresponding labels
labels_for_tips_existing_more_recently_than_10mya = 
tr_table$label[ TF_exists_more_recently_than_10mya == TRUE ]


###
# Draft chainsaw function
###
# loop through the branches that cross 10 mya

# get a list of the edge start/stops in the phylogeny's 
edges

edge_times_bp = get_edge_times_before_present(tr)

# which of these branches cross 10 mya?
edges_start_earlier_than_10mya = edge_times_bp[, 1]  
timepoint
edges_end_later_than_10mya = edge_times_bp[, 2] = 
timepoint
edges_to_chainsaw = edges_start_earlier_than_10mya + 
edges_end_later_than_10mya == 2


# then, for each of these edges, figure out how many 
tips exist descending from it

nodes_to_chainsaw = tr$edge[, 2][edges_to_chainsaw]
nodes_to_chainsaw = nodes_to_chainsaw[nodes_to_chainsaw 
 length(tr$tip.label)]


# create a copy of the tree to chainsaw
tree_to_chainsaw = tr

for (i in 1:length(nodes_to_chainsaw))
{
tmp_subtree = extract.clade(tr, nodes_to_chainsaw[i])
#print(tmp_subtree$tip.label)

tmp_number_of_tips = length(tmp_subtree$tip.label)
#print(tmp_number_of_tips)

# number of tips to drop = (numtips -1)
numtips_to_drop = tmp_number_of_tips - 1

# tips_to_drop
tmp_labels = tmp_subtree$tip.label

labels_to_drop = tmp_labels[1:numtips_to_drop]

# new label
label_kept_num = length(tmp_labels)
label_kept = tmp_labels[label_kept_num]
new_label = paste(CA_, label_kept, +, 
numtips_to_drop, _tips, sep=)


tree_to_chainsaw$tip.label[tree_to_chainsaw$tip.label == 
label_kept] = new_label


# chop off e.g. 2 of the 3 tips
tree_to_chainsaw = drop.tip(tree_to_chainsaw, 
labels_to_drop)


}
#plot(tree_to_chainsaw)
#axisPhylo()

tree_to_chainsaw_table = prt(tree_to_chainsaw, 
printflag=FALSE)


tree_to_chainsaw_table_tips_TF_time_bp_LT_10my = 
tree_to_chainsaw_table$time_bp  timepoint



tmp_edge_lengths = 
tree_to_chainsaw_table$edge.length[tree_to_chainsaw_table_tips_TF_time_bp_LT_10my]


times_bp_for_edges_to_chainsaw = 
tree_to_chainsaw_table$time_bp[tree_to_chainsaw_table_tips_TF_time_bp_LT_10my]


adjustment = times_bp_for_edges_to_chainsaw - timepoint

revised_tmp_edge_lengths = tmp_edge_lengths + adjustment


tree_to_chainsaw_table$edge.length[tree_to_chainsaw_table_tips_TF_time_bp_LT_10my] 
= revised_tmp_edge_lengths


# revised
ordered_nodenames = get_nodenums(tree_to_chainsaw)
parent_branches = 
get_indices_where_list1_occurs_in_list2(ordered_nodenames, 
tree_to_chainsaw$edge[,2])


NA_false = is.not.na(tree_to_chainsaw_table$edge.length)

tree_to_chainsaw$edge.length[parent_branches[NA_false]] 
= tree_to_chainsaw_table$edge.length[NA_false]


return(tree_to_chainsaw)
}


# print tree in hierarchical format
prt - function(t, printflag=TRUE, relabel_nodes = FALSE, 
time_bp_digits=7)

{
# assemble beginning table

# check if internal node labels exist
if (node.label %in% attributes(t)$names == FALSE)
{
rootnum = get_nodenum_structural_root(t)

new_node_labels = paste(inNode, 
rootnum:(rootnum+t$Nnode-1), sep=)

t$node.label = new_node_labels
}

# or manually relabel the internal nodes, if desired
if (relabel_nodes == TRUE)
{
rootnum = get_nodenum_structural_root(t)

new_node_labels = paste(inNode, 
rootnum:(rootnum+t$Nnode-1), sep=)

t$node.label = new_node_labels
}

labels = c(t$tip.label, t$node.label)
ordered_nodenames = get_nodenums(t)
#nodenums = 1:length(labels)
node.types1 = rep(tip, length(t$tip.label))
node.types2 = rep(internal, length(t$node.label))
node.types2[1] = root
node.types = c(node.types1, node.types2)

# These are the index numbers of the edges below each node
parent_branches = 
get_indices_where_list1_occurs_in_list2(ordered_nodenames, 
t$edge[,2])

#parent_edges = parent_branches
brlen_to_parent = t$edge.length[parent_branches]

parent_nodes = t$edge[,1][parent_branches]
daughter_nodes = lapply(ordered_nodenames, 

[R-sig-phylo] cut a tree in a given time interval

2011-07-12 Thread ppiras
Hi all,
someone knows a smart coding to cut a tree in order to
retain only taxa present in a given time interval bin?

Thanks in advance for any suggestion
best
paolo

___
R-sig-phylo mailing list
R-sig-phylo@r-project.org
https://stat.ethz.ch/mailman/listinfo/r-sig-phylo


Re: [R-sig-phylo] cut a tree in a given time interval

2011-07-12 Thread Nick Matzke
I wrote a function called chainsaw to do something like 
this, it saws off all the branches at a particular time 
point, then you just have to drop.tip on branch tips older 
than your time period.  Would that be helpful?


On 7/12/11 3:19 PM, ppi...@uniroma3.it wrote:

Hi all,
someone knows a smart coding to cut a tree in order to
retain only taxa present in a given time interval bin?

Thanks in advance for any suggestion
best
paolo

___
R-sig-phylo mailing list
R-sig-phylo@r-project.org
https://stat.ethz.ch/mailman/listinfo/r-sig-phylo



--

Nicholas J. Matzke
Ph.D. Candidate, Graduate Student Researcher

Huelsenbeck Lab
Center for Theoretical Evolutionary Genomics
4151 VLSB (Valley Life Sciences Building)
Department of Integrative Biology
University of California, Berkeley

Graduate Student Instructor, IB200B
Principles of Phylogenetics: Ecology and Evolution
http://ib.berkeley.edu/courses/ib200b/
http://phylo.wikidot.com/


Lab websites:
http://ib.berkeley.edu/people/lab_detail.php?lab=54
http://fisher.berkeley.edu/cteg/hlab.html
Dept. personal page: 
http://ib.berkeley.edu/people/students/person_detail.php?person=370
Lab personal page: 
http://fisher.berkeley.edu/cteg/members/matzke.html

Lab phone: 510-643-6299
Dept. fax: 510-643-6264

Cell phone: 510-301-0179
Email: mat...@berkeley.edu

Mailing address:
Department of Integrative Biology
3060 VLSB #3140
Berkeley, CA 94720-3140

-
[W]hen people thought the earth was flat, they were wrong. 
When people thought the earth was spherical, they were 
wrong. But if you think that thinking the earth is spherical 
is just as wrong as thinking the earth is flat, then your 
view is wronger than both of them put together.


Isaac Asimov (1989). The Relativity of Wrong. The 
Skeptical Inquirer, 14(1), 35-44. Fall 1989.

http://chem.tufts.edu/AnswersInScience/RelativityofWrong.htm

___
R-sig-phylo mailing list
R-sig-phylo@r-project.org
https://stat.ethz.ch/mailman/listinfo/r-sig-phylo


Re: [R-sig-phylo] cut a tree in a given time interval

2011-07-12 Thread David Bapst
Do you want a function that gives you a tree of taxa observed in a time bin
(like, in the fossil record, as in Ruta et al., 2007)? Or gives you the tree
of the lineages extant at point in time based on some global phylogeny? If
that is what you want, the following function I wrote for doing time-slices
on trees should do that. My most recent version (not posted) has some
additions so that the same time-slice can be taken across multiple trees
that may have the root in different place. (These modifications use the
earliest fossil species as an anchor time.)

This version of the code seems to work okay, for example try:
tree1-rtree(500)
timeslice.phy(tree1,slice.time=4,plot=T)

Let me know if this works alright,
-Dave, UChicago


timeslice.phy-function(ttree,slice.time,plot=T){
#take a phylogeny and produce a phylogenetic 'slice' at time X
#makes an ultrametric tree, as if made of lineages extant at time X
#where X is a point in time after root time = 0
require(ape)
tslice-slice.time #time from root to slice time
#first let's drop all edges that branch later than the slice
#make them all single lineages by dropping all but one taxon
dnode-dist.nodes(ttree)[,Ntip(ttree)+1]
#identify the ancestor nodes of edges which cross the tslice
cedge-which(sapply(1:Nedge(ttree),function(x)
any(ttree$edge[x,1]==which(dnodetslice))
 any(ttree$edge[x,2]==which(dnode=tslice
droppers-numeric()
for(i in 1:length(cedge)){
desc-ttree$edge[cedge[i],2]
if(descNtip(ttree)){#if an internal edge that goes past the
tslice
desctip-unlist(prop.part(ttree)[desc-Ntip(ttree)])#drop all
but one tip
droppers-c(droppers,desctip[-1])
}}
stree-drop.tip(ttree,droppers)
#which edges cross over tslice?
dnode-dist.nodes(stree)[,Ntip(stree)+1]
cedge-sapply(1:Nedge(stree),function(x)
any(stree$edge[x,2]==which(dnode=tslice)))
cnode_depth-dnode[stree$edge[cedge,1]]
stree$edge.length[cedge]-tslice-cnode_depth
#last, let's drop all terminal taxa that are less than the tslice
dnode-dist.nodes(stree)[,Ntip(stree)+1]
droppers-which((dnode[1:Ntip(stree)]+0.1)tslice)
stree-drop.tip(stree,droppers)

if(plot){layout(matrix(1:2,,2));plot(ladderize(ttree),show.tip.label=F);axisPhylo();plot(stree,show.tip.label=F)}
stree
}

On Tue, Jul 12, 2011 at 3:19 PM, ppi...@uniroma3.it wrote:

 Hi all,
 someone knows a smart coding to cut a tree in order to
 retain only taxa present in a given time interval bin?

 Thanks in advance for any suggestion
 best
 paolo

 ___
 R-sig-phylo mailing list
 R-sig-phylo@r-project.org
 https://stat.ethz.ch/mailman/listinfo/r-sig-phylo




-- 
David Bapst
Dept of Geophysical Sciences
University of Chicago
5734 S. Ellis
Chicago, IL 60637
http://home.uchicago.edu/~dwbapst/

[[alternative HTML version deleted]]

___
R-sig-phylo mailing list
R-sig-phylo@r-project.org
https://stat.ethz.ch/mailman/listinfo/r-sig-phylo


Re: [R-sig-phylo] cut a tree in a given time interval

2011-07-12 Thread Nick Matzke


Here's chainsaw().  It also requires sourcing a few other 
functions.  Please cite me if you use it :-).


=

chainsaw - function(tr, timepoint=10)
{
# Take a tree and saw it off evenly across a certain timepoint.
# This removes any tips above the timepoint, and replaces them
# with a single tip representing the lineage crossing
# the timepoint (with a new tip name).

# Get the tree in a table
tr_table = prt(tr, printflag=FALSE)

# Find the tips that are less than 10 my old and drop them
	TF_exists_more_recently_than_10mya = tr_table$time_bp  
timepoint


# Get the corresponding labels
	labels_for_tips_existing_more_recently_than_10mya = 
tr_table$label[ TF_exists_more_recently_than_10mya == TRUE ]


###
# Draft chainsaw function
###
# loop through the branches that cross 10 mya

# get a list of the edge start/stops in the phylogeny's edges
edge_times_bp = get_edge_times_before_present(tr)

# which of these branches cross 10 mya?
edges_start_earlier_than_10mya = edge_times_bp[, 1]  timepoint
edges_end_later_than_10mya = edge_times_bp[, 2] = timepoint
	edges_to_chainsaw = edges_start_earlier_than_10mya + 
edges_end_later_than_10mya == 2


	# then, for each of these edges, figure out how many tips 
exist descending from it

nodes_to_chainsaw = tr$edge[, 2][edges_to_chainsaw]
	nodes_to_chainsaw = nodes_to_chainsaw[nodes_to_chainsaw  
length(tr$tip.label)]


# create a copy of the tree to chainsaw
tree_to_chainsaw = tr

for (i in 1:length(nodes_to_chainsaw))
{
tmp_subtree = extract.clade(tr, nodes_to_chainsaw[i])
#print(tmp_subtree$tip.label)

tmp_number_of_tips = length(tmp_subtree$tip.label)
#print(tmp_number_of_tips)

# number of tips to drop = (numtips -1)
numtips_to_drop = tmp_number_of_tips - 1

# tips_to_drop
tmp_labels = tmp_subtree$tip.label

labels_to_drop = tmp_labels[1:numtips_to_drop]

# new label
label_kept_num = length(tmp_labels)
label_kept = tmp_labels[label_kept_num]
		new_label = paste(CA_, label_kept, +, numtips_to_drop, 
_tips, sep=)
		tree_to_chainsaw$tip.label[tree_to_chainsaw$tip.label == 
label_kept] = new_label


# chop off e.g. 2 of the 3 tips
tree_to_chainsaw = drop.tip(tree_to_chainsaw, labels_to_drop)

}
#plot(tree_to_chainsaw)
#axisPhylo()

tree_to_chainsaw_table = prt(tree_to_chainsaw, printflag=FALSE)

	tree_to_chainsaw_table_tips_TF_time_bp_LT_10my = 
tree_to_chainsaw_table$time_bp  timepoint



	tmp_edge_lengths = 
tree_to_chainsaw_table$edge.length[tree_to_chainsaw_table_tips_TF_time_bp_LT_10my]


	times_bp_for_edges_to_chainsaw = 
tree_to_chainsaw_table$time_bp[tree_to_chainsaw_table_tips_TF_time_bp_LT_10my]


adjustment = times_bp_for_edges_to_chainsaw - timepoint

revised_tmp_edge_lengths = tmp_edge_lengths + adjustment


tree_to_chainsaw_table$edge.length[tree_to_chainsaw_table_tips_TF_time_bp_LT_10my]
 = revised_tmp_edge_lengths

# revised
ordered_nodenames = get_nodenums(tree_to_chainsaw)
	parent_branches = 
get_indices_where_list1_occurs_in_list2(ordered_nodenames, 
tree_to_chainsaw$edge[,2])


NA_false = is.not.na(tree_to_chainsaw_table$edge.length)

	tree_to_chainsaw$edge.length[parent_branches[NA_false]] = 
tree_to_chainsaw_table$edge.length[NA_false]


return(tree_to_chainsaw)
}


# print tree in hierarchical format
prt - function(t, printflag=TRUE, relabel_nodes = FALSE, 
time_bp_digits=7)

{
# assemble beginning table

# check if internal node labels exist
if (node.label %in% attributes(t)$names == FALSE)
{
rootnum = get_nodenum_structural_root(t)

		new_node_labels = paste(inNode, 
rootnum:(rootnum+t$Nnode-1), sep=)

t$node.label = new_node_labels
}

# or manually relabel the internal nodes, if desired
if (relabel_nodes == TRUE)
{
rootnum = get_nodenum_structural_root(t)

		new_node_labels = paste(inNode, 
rootnum:(rootnum+t$Nnode-1), sep=)

t$node.label = new_node_labels
}

labels = c(t$tip.label, t$node.label)
ordered_nodenames =