Author: bugman
Date: Wed Jan 21 15:40:56 2015
New Revision: 27258
URL: http://svn.gna.org/viewcvs/relax?rev=27258&view=rev
Log:
Created the lib.sequence_alignment.align_protein module for the sequence
alignment of proteins.
This general module currently implements the align_pairwise() function for the
pairwise alignment of
protein sequences. It provides the infrastructure for specifying gap starting
and extension
penalties, choosing the alignment algorithm (currently only the
Needleman-Wunsch sequence alignment
algorithm as 'NW70'), and choosing the substitution matrix (currently only
BLOSUM62). The function
provides lots of printouts for user feedback.
Added:
trunk/lib/sequence_alignment/align_protein.py
Modified:
trunk/lib/sequence_alignment/__init__.py
Modified: trunk/lib/sequence_alignment/__init__.py
URL:
http://svn.gna.org/viewcvs/relax/trunk/lib/sequence_alignment/__init__.py?rev=27258&r1=27257&r2=27258&view=diff
==============================================================================
--- trunk/lib/sequence_alignment/__init__.py (original)
+++ trunk/lib/sequence_alignment/__init__.py Wed Jan 21 15:40:56 2015
@@ -23,6 +23,7 @@
"""The relax-lib sequence alignment package - a library of functions for
aligning proteins, DNA, RNA or other molecules."""
__all__ = [
+ 'align_protein',
'needleman_wunsch',
'substitution_matrices'
]
Added: trunk/lib/sequence_alignment/align_protein.py
URL:
http://svn.gna.org/viewcvs/relax/trunk/lib/sequence_alignment/align_protein.py?rev=27258&view=auto
==============================================================================
--- trunk/lib/sequence_alignment/align_protein.py (added)
+++ trunk/lib/sequence_alignment/align_protein.py Wed Jan 21 15:40:56 2015
@@ -0,0 +1,98 @@
+###############################################################################
+# #
+# Copyright (C) 2015 Edward d'Auvergne #
+# #
+# This file is part of the program relax (http://www.nmr-relax.com). #
+# #
+# This program is free software: you can redistribute it and/or modify #
+# it under the terms of the GNU General Public License as published by #
+# the Free Software Foundation, either version 3 of the License, or #
+# (at your option) any later version. #
+# #
+# This program is distributed in the hope that it will be useful, #
+# but WITHOUT ANY WARRANTY; without even the implied warranty of #
+# MERCHANTABILITY or FITNESS FOR A PARTICULAR PURPOSE. See the #
+# GNU General Public License for more details. #
+# #
+# You should have received a copy of the GNU General Public License #
+# along with this program. If not, see <http://www.gnu.org/licenses/>. #
+# #
+###############################################################################
+
+# Module docstring.
+"""General sequence alignment functions."""
+
+# Python module imports.
+from string import upper
+import sys
+from warnings import warn
+
+# relax module imports.
+from lib.errors import RelaxError
+from lib.sequence_alignment.needleman_wunsch import needleman_wunsch_align
+from lib.sequence_alignment.substitution_matrices import BLOSUM62, BLOSUM62_SEQ
+from lib.warnings import RelaxWarning
+
+
+def align_pairwise(sequence1, sequence2, algorithm='NW70', matrix='BLOSUM62',
gap_penalty=1.0, extend_penalty=1.0):
+ """Align two protein sequences.
+
+ @param sequence1: The first protein sequence as one letter codes.
+ @type sequence1: str
+ @param sequence2: The second protein sequence as one letter
codes.
+ @type sequence2: str
+ @keyword algorithm: The alignment algorithm to use.
+ @type algorithm: str
+ @keyword matrix: The substitution matrix to use.
+ @type matrix: str
+ @keyword gap_penalty: The penalty for introducing gaps, as a
positive number.
+ @type gap_penalty: float
+ @keyword extend_penalty: The penalty for extending a gap, as a positive
number.
+ @type extend_penalty: float
+ """
+
+ # Checks.
+ allowed_algor = ['NW70']
+ if algorithm not in allowed_algor:
+ raise RelaxError("The sequence alignment algorithm '%s' is unknown, it
must be one of %s." % (algorithm, allowed_algor))
+ allowed_matrices = ['BLOSUM62']
+ if matrix not in allowed_matrices:
+ raise RelaxError("The substitution matrix '%s' is unknown, it must be
one of %s." % (matrix, allowed_matrices))
+
+ # Capitalise the sequences.
+ sequence1 = upper(sequence1)
+ sequence2 = upper(sequence2)
+
+ # Turn off the extension penalty for algorithms not supporting it.
+ if extend_penalty != None and algorithm in ['NW70']:
+ warn(RelaxWarning("The gap extension penalty is not supported by the
Needleman-Wunsch sequence alignment algorithm."))
+ extend_penalty = None
+
+ # Initial printout.
+ sys.stdout.write("\nPairwise protein alignment.\n")
+ sys.stdout.write("%-30s %s\n" % ("Substitution matrix:", matrix))
+ sys.stdout.write("%-30s %s\n" % ("Gap penalty:", gap_penalty))
+ sys.stdout.write("%-30s %s\n" % ("Extend penalty:", extend_penalty))
+ sys.stdout.write("\n%-30s %s\n" % ("Input sequence 1:", sequence1))
+ sys.stdout.write("%-30s %s\n" % ("Input sequence 2:", sequence2))
+
+ # Select the substitution matrix.
+ if matrix == 'BLOSUM62':
+ sub_matrix = BLOSUM62
+ sub_seq = BLOSUM62_SEQ
+
+ # The alignment.
+ if algorithm == 'NW70':
+ align1, align2, gaps = needleman_wunsch_align(sequence1, sequence2,
sub_matrix=sub_matrix, sub_seq=sub_seq, gap_penalty=gap_penalty)
+
+ # Final printout.
+ sys.stdout.write("\n%-30s %s\n" % ("Aligned sequence 1:", align1))
+ sys.stdout.write("%-30s %s\n" % ("Aligned sequence 2:", align2))
+ sys.stdout.write("%-30s " % "")
+ for i in range(len(align1)):
+ if align1[i] == align2[i]:
+ sys.stdout.write("*")
+ else:
+ sys.stdout.write(" ")
+ sys.stdout.write("\n\n")
+
_______________________________________________
relax (http://www.nmr-relax.com)
This is the relax-commits mailing list
[email protected]
To unsubscribe from this list, get a password
reminder, or change your subscription options,
visit the list information page at
https://mail.gna.org/listinfo/relax-commits