Author: lkajan-guest Date: 2012-10-11 20:50:46 +0000 (Thu, 11 Oct 2012) New Revision: 12392
Added: trunk/packages/conservation-code/trunk/debian/patches/stockholm_format Modified: trunk/packages/conservation-code/trunk/debian/changelog trunk/packages/conservation-code/trunk/debian/patches/series trunk/packages/conservation-code/trunk/debian/score_conservation.1.pod Log: allow parsing of Stockholm format as well Modified: trunk/packages/conservation-code/trunk/debian/changelog =================================================================== --- trunk/packages/conservation-code/trunk/debian/changelog 2012-10-11 19:59:08 UTC (rev 12391) +++ trunk/packages/conservation-code/trunk/debian/changelog 2012-10-11 20:50:46 UTC (rev 12392) @@ -1,3 +1,9 @@ +conservation-code (20110309.0-2) UNRELEASED; urgency=low + + * Allow parsing of Stockholm format as well. + + -- Laszlo Kajan <[email protected]> Thu, 11 Oct 2012 22:50:04 +0200 + conservation-code (20110309.0-1) unstable; urgency=low * Initial release. (Closes: #690058) Modified: trunk/packages/conservation-code/trunk/debian/patches/series =================================================================== --- trunk/packages/conservation-code/trunk/debian/patches/series 2012-10-11 19:59:08 UTC (rev 12391) +++ trunk/packages/conservation-code/trunk/debian/patches/series 2012-10-11 20:50:46 UTC (rev 12392) @@ -3,3 +3,4 @@ numpy.numarray default_matrix_path optimize_loop +stockholm_format Added: trunk/packages/conservation-code/trunk/debian/patches/stockholm_format =================================================================== --- trunk/packages/conservation-code/trunk/debian/patches/stockholm_format (rev 0) +++ trunk/packages/conservation-code/trunk/debian/patches/stockholm_format 2012-10-11 20:50:46 UTC (rev 12392) @@ -0,0 +1,53 @@ +Author: Laszlo Kajan <[email protected]> +Description: allow parsing of Stockholm format as well +Forwarded: no +--- a/score_conservation.py ++++ b/score_conservation.py +@@ -84,6 +84,7 @@ + ################################################################################ + + import math, sys, getopt ++import re + # numarray imported below + + PSEUDOCOUNT = .0000001 +@@ -679,12 +680,14 @@ + return names, alignment + + def read_clustal_alignment(filename): +- """ Read in the alignment stored in the CLUSTAL file, filename. Return ++ """ Read in the alignment stored in the CLUSTAL or Stockholm file, filename. Return + two lists: the names and sequences. """ + + names = [] + alignment = [] + ++ re_stock_markup = re.compile('^#=') ++ + f = open(filename) + + for line in f: +@@ -692,16 +695,20 @@ + if len(line) == 0: continue + if '*' in line: continue + +- if 'CLUSTAL' in line: continue ++ if line[0:7] == 'CLUSTAL': continue ++ if line[0:11] == '# STOCKHOLM': continue ++ if line[0:2] == '//': continue ++ ++ if re_stock_markup.match(line): continue + + t = line.split() + + if len(t) == 2 and t[1][0] in iupac_alphabet: + if t[0] not in names: + names.append(t[0]) +- alignment.append(t[1].upper().replace('B', 'D').replace('Z', 'Q').replace('X', '-').replace('\r', '')) ++ alignment.append(t[1].upper().replace('B', 'D').replace('Z', 'Q').replace('X', '-').replace('\r', '').replace('.', '-')) + else: +- alignment[names.index(t[0])] += t[1].upper().replace('B', 'D').replace('Z', 'Q').replace('X','-').replace('\r', '') ++ alignment[names.index(t[0])] += t[1].upper().replace('B', 'D').replace('Z', 'Q').replace('X','-').replace('\r', '').replace('.', '-') + + return names, alignment + Modified: trunk/packages/conservation-code/trunk/debian/score_conservation.1.pod =================================================================== --- trunk/packages/conservation-code/trunk/debian/score_conservation.1.pod 2012-10-11 19:59:08 UTC (rev 12391) +++ trunk/packages/conservation-code/trunk/debian/score_conservation.1.pod 2012-10-11 20:50:46 UTC (rev 12392) @@ -10,7 +10,7 @@ =head1 DESCRIPTION -Score protein sequence conservation in B<ALIGNFILE>. B<ALIGNFILE> must be in FASTA or CLUSTAL format. +Score protein sequence conservation in B<ALIGNFILE>. B<ALIGNFILE> must be in FASTA, CLUSTAL or Stockholm format. The following conservation scoring methods are implemented: * sum of pairs @@ -32,7 +32,7 @@ Jensen-Shannon divergence and a window B<-w> of I<3>. The sequence-specific output can be used as the conservation input for -concavity. +concavity(1). Conservation is highly predictive in identifying catalytic sites and residues near bound ligands. _______________________________________________ debian-med-commit mailing list [email protected] http://lists.alioth.debian.org/cgi-bin/mailman/listinfo/debian-med-commit
