Andreas Tille pushed to branch upstream at Debian Med / python-treetime
Commits: de590170 by Andreas Tille at 2022-02-19T08:37:39+01:00 New upstream version 0.8.6 - - - - - 3 changed files: - changelog.md - treetime/__init__.py - treetime/treeanc.py Changes: ===================================== changelog.md ===================================== @@ -1,3 +1,7 @@ +# 0.8.6 + * optionally allow incomplete alignment [PR #178](https://github.com/neherlab/treetime/pull/178) + * reduce memory footprint through better clean up and optimizing types. [PR #179](https://github.com/neherlab/treetime/pull/179) + # 0.8.5 * bug fixes related to edge cases were sequences consist only of missing data * bug fix when the CLI command `treetime` is run without alignment ===================================== treetime/__init__.py ===================================== @@ -1,5 +1,5 @@ from __future__ import print_function, division, absolute_import -version="0.8.5" +version="0.8.6" class TreeTimeError(Exception): """TreeTimeError class""" ===================================== treetime/treeanc.py ===================================== @@ -56,7 +56,7 @@ class TreeAnc(object): def __init__(self, tree=None, aln=None, gtr=None, fill_overhangs=True, ref=None, verbose = ttconf.VERBOSE, ignore_gaps=True, convert_upper=True, seq_multiplicity=None, log=None, - compress=True, seq_len=None, + compress=True, seq_len=None, ignore_missing_alns=False, **kwargs): """ TreeAnc constructor. It prepares the tree, attaches sequences to the leaf nodes, @@ -78,22 +78,22 @@ class TreeAnc(object): GTR model object. If string passed, it is interpreted as the type of the GTR model. A new GTR instance will be created for this type. - fill_overhangs : bool + fill_overhangs : bool, default True In some cases, the missing data on both ends of the alignment is filled with the gap sign('-'). If set to True, the end-gaps are converted to "unknown" characters ('N' for nucleotides, 'X' for aminoacids). Otherwise, the alignment is treated as-is ref : None, optional - Reference sequence used in VCF mode + Reference sequence used in VCF mode - verbose : int + verbose : int, default 3 Verbosity level as number from 0 (lowest) to 10 (highest). - ignore_gaps : bool + ignore_gaps : bool, default True Ignore gaps in branch length calculations - convert_upper : bool, optional - Description + convert_upper : bool, default True + Convert all sequences to upper case seq_multiplicity : dict If individual nodes in the tree correspond to multiple sampled sequences @@ -101,7 +101,7 @@ class TreeAnc(object): specified as a dictionary. This currently only affects rooting and can be used to weigh individual tips by abundance or important during root search. - compress : bool, optional + compress : bool, default True reduce identical alignment columns to one (not useful when inferring site specific GTR models). @@ -109,6 +109,8 @@ class TreeAnc(object): length of the sequence. this is inferred from the input alignment or the reference sequence in most cases but can be specified for other applications. + ignore_missing_alns : bool, default False + **kwargs Keyword arguments to construct the GTR model @@ -139,6 +141,7 @@ class TreeAnc(object): self.ignore_gaps = ignore_gaps self.reconstructed_tip_sequences = False self.sequence_reconstruction = None + self.ignore_missing_alns = ignore_missing_alns self._tree = None self.tree = tree @@ -335,7 +338,7 @@ class TreeAnc(object): Returns ------- float - inverse of the uncompressed sequene length - length scale for short branches + inverse of the uncompressed sequence length - length scale for short branches """ return 1.0/self.data.full_length if self.data.full_length else np.nan @@ -376,7 +379,7 @@ class TreeAnc(object): if l.name not in self.data.compressed_alignment and l.is_terminal(): self.logger("***WARNING: TreeAnc._attach_sequences_to_nodes: NO SEQUENCE FOR LEAF: %s" % l.name, 0, warn=True) failed_leaves += 1 - if failed_leaves > self.tree.count_terminals()/3: + if not self.ignore_missing_alns and failed_leaves > self.tree.count_terminals()/3: raise MissingDataError("TreeAnc._check_alignment_tree_gtr_consistency: At least 30\\% terminal nodes cannot be assigned a sequence!\n" "Are you sure the alignment belongs to the tree?") else: # could not assign sequence for internal node - is OK @@ -906,12 +909,19 @@ class TreeAnc(object): # this is prod_ch L_x(i) msg_from_children = np.sum(np.stack([c.joint_Lx for c in node.clades], axis=0), axis=0) + if not debug: + # Now that we have calculated the current node's likelihood + # from its children, clean up likelihood matrices attached + # to children to save memory. + for c in node.clades: + del c.joint_Lx + # for every possible state of the parent node, # get the best state of the current node # and compute the likelihood of this state # preallocate storage - node.joint_Lx = np.zeros((L, n_states)) # likelihood array - node.joint_Cx = np.zeros((L, n_states), dtype=int) # max LH indices + node.joint_Lx = np.zeros((L, n_states)) # likelihood array + node.joint_Cx = np.zeros((L, n_states), dtype=np.uint16) # max LH indices for char_i, char in enumerate(self.gtr.alphabet): # Pij(i) * L_ch(i) for given parent state j msg_to_parent = (log_transitions[:,char_i].T + msg_from_children) @@ -973,7 +983,10 @@ class TreeAnc(object): # do clean-up if not debug: for node in self.tree.find_clades(order='preorder'): - del node.joint_Lx + # Check for the likelihood matrix, since we might have cleaned + # it up earlier. + if hasattr(node, "joint_Lx"): + del node.joint_Lx del node.joint_Cx if hasattr(node, 'seq_idx'): del node.seq_idx View it on GitLab: https://salsa.debian.org/med-team/python-treetime/-/commit/de590170282fb0ab347d696cbbd5557bdae500f7 -- View it on GitLab: https://salsa.debian.org/med-team/python-treetime/-/commit/de590170282fb0ab347d696cbbd5557bdae500f7 You're receiving this email because of your account on salsa.debian.org.
_______________________________________________ debian-med-commit mailing list [email protected] https://alioth-lists.debian.net/cgi-bin/mailman/listinfo/debian-med-commit
