Author: bugman
Date: Sat Jan 31 17:56:51 2015
New Revision: 27433
URL: http://svn.gna.org/viewcvs/relax?rev=27433&view=rev
Log:
Creation of the lib.sequence_alignment.msa.msa_general() function.
This consists of code from the structure.sequence_alignment user function
backend function
pipe_control.structure.main.sequence_alignment() for selecting between the
different sequence
alignment methods.
Modified:
trunk/lib/sequence_alignment/msa.py
trunk/pipe_control/structure/main.py
Modified: trunk/lib/sequence_alignment/msa.py
URL:
http://svn.gna.org/viewcvs/relax/trunk/lib/sequence_alignment/msa.py?rev=27433&r1=27432&r2=27433&view=diff
==============================================================================
--- trunk/lib/sequence_alignment/msa.py (original)
+++ trunk/lib/sequence_alignment/msa.py Sat Jan 31 17:56:51 2015
@@ -27,6 +27,7 @@
import sys
# relax module imports.
+from lib.errors import RelaxError
from lib.sequence_alignment.align_protein import align_pairwise
@@ -167,6 +168,68 @@
return strings, gaps
+def msa_general(sequences, residue_numbers=None, msa_algorithm='Central Star',
pairwise_algorithm='NW70', matrix='BLOSUM62', gap_open_penalty=1.0,
gap_extend_penalty=1.0, end_gap_open_penalty=0.0, end_gap_extend_penalty=0.0):
+ """General interface for multiple sequence alignments (MSA).
+
+ This can be used to select between the following MSA algorithms:
+
+ - 'Central Star', to use the central_star() function.
+ - 'residue number', to use the msa_residue_numbers() function.
+
+
+ @param sequences: The list of residue sequences as one
letter codes.
+ @type sequences: list of str
+ @keyword residue_numbers: The list of residue numbers for each
sequence.
+ @type residue_numbers: list of list of int
+ @keyword msa_algorithm: The multiple sequence alignment (MSA)
algorithm to use.
+ @type msa_algorithm: str
+ @keyword pairwise_algorithm: The pairwise sequence alignment
algorithm to use.
+ @type pairwise_algorithm: str
+ @keyword matrix: The substitution matrix to use.
+ @type matrix: str
+ @keyword gap_open_penalty: The penalty for introducing gaps, as a
positive number.
+ @type gap_open_penalty: float
+ @keyword gap_extend_penalty: The penalty for extending a gap, as a
positive number.
+ @type gap_extend_penalty: float
+ @keyword end_gap_open_penalty: The optional penalty for opening a gap
at the end of a sequence.
+ @type end_gap_open_penalty: float
+ @keyword end_gap_extend_penalty: The optional penalty for extending a
gap at the end of a sequence.
+ @type end_gap_extend_penalty: float
+ @return: The list of alignment strings and the
gap matrix.
+ @rtype: list of str, numpy rank-2 int array
+ """
+
+ # Check the MSA algorithm.
+ allowed_msa = ['Central Star', 'residue number']
+ if msa_algorithm not in allowed_msa:
+ raise RelaxError("The MSA algorithm must be one of %s." % allowed_msa)
+
+ # Check the penalty arguments.
+ if gap_open_penalty != None:
+ if gap_open_penalty < 0.0:
+ raise RelaxError("The gap opening penalty %s must be a positive
number." % gap_open_penalty)
+ if gap_extend_penalty != None:
+ if gap_extend_penalty < 0.0:
+ raise RelaxError("The gap extension penalty %s must be a positive
number." % gap_extend_penalty)
+ if end_gap_open_penalty != None:
+ if end_gap_open_penalty < 0.0:
+ raise RelaxError("The end gap opening penalty %s must be a
positive number." % end_gap_open_penalty)
+ if end_gap_extend_penalty != None:
+ if end_gap_extend_penalty < 0.0:
+ raise RelaxError("The end gap extension penalty %s must be a
positive number." % end_gap_extend_penalty)
+
+ # Use the central star multiple alignment algorithm.
+ if msa_algorithm == 'Central Star':
+ strings, gaps = central_star(sequences, algorithm=pairwise_algorithm,
matrix=matrix, gap_open_penalty=gap_open_penalty,
gap_extend_penalty=gap_extend_penalty,
end_gap_open_penalty=end_gap_open_penalty,
end_gap_extend_penalty=end_gap_extend_penalty)
+
+ # Alignment by residue number.
+ elif msa_algorithm == 'residue number':
+ strings, gaps = msa_residue_numbers(sequences,
residue_numbers=residue_numbers)
+
+ # Return the alignment strings and gap matrix.
+ return strings, gaps
+
+
def msa_residue_numbers(sequences, residue_numbers=None):
"""Align multiple sequences based on the residue numbering.
Modified: trunk/pipe_control/structure/main.py
URL:
http://svn.gna.org/viewcvs/relax/trunk/pipe_control/structure/main.py?rev=27433&r1=27432&r2=27433&view=diff
==============================================================================
--- trunk/pipe_control/structure/main.py (original)
+++ trunk/pipe_control/structure/main.py Sat Jan 31 17:56:51 2015
@@ -38,7 +38,7 @@
from lib.plotting.api import correlation_matrix
from lib.selection import tokenise
from lib.sequence import write_spin_data
-from lib.sequence_alignment.msa import central_star, msa_residue_numbers,
msa_residue_skipping
+from lib.sequence_alignment.msa import msa_general, msa_residue_numbers,
msa_residue_skipping
from lib.structure.internal.coordinates import assemble_atomic_coordinates,
assemble_coord_array, loop_coord_structures
from lib.structure.internal.displacements import Displacements
from lib.structure.internal.object import Internal
@@ -1235,30 +1235,22 @@
@type end_gap_extend_penalty: float
"""
- # Check the penalty arguments.
- if gap_open_penalty != None:
- if gap_open_penalty < 0.0:
- raise RelaxError("The gap opening penalty %s must be a positive
number." % gap_open_penalty)
- if gap_extend_penalty != None:
- if gap_extend_penalty < 0.0:
- raise RelaxError("The gap extension penalty %s must be a positive
number." % gap_extend_penalty)
- if end_gap_open_penalty != None:
- if end_gap_open_penalty < 0.0:
- raise RelaxError("The end gap opening penalty %s must be a
positive number." % end_gap_open_penalty)
- if end_gap_extend_penalty != None:
- if end_gap_extend_penalty < 0.0:
- raise RelaxError("The end gap extension penalty %s must be a
positive number." % end_gap_extend_penalty)
-
# Assemble the structural objects.
objects, object_names, pipes = assemble_structural_objects(pipes=pipes,
models=models, molecules=molecules)
# Assemble the atomic coordinates of all molecules.
ids, object_id_list, model_list, molecule_list, atom_pos, mol_names,
res_names, res_nums, atom_names, elements, one_letter_codes, num_mols =
assemble_atomic_coordinates(objects=objects, object_names=object_names,
molecules=molecules, models=models)
+ # Convert the residue number data structure.
+ res_num_list = []
+ for mol_index in range(num_mols):
+ res_num_list.append([])
+ for i in range(len(one_letter_codes[mol_index])):
+ key = res_nums[mol_index][i].keys()[0]
+ res_num_list[mol_index].append(res_nums[mol_index][i][key])
+
# MSA.
- if msa_algorithm == 'Central Star':
- # Use the central star multiple alignment algorithm.
- strings, gaps = central_star(one_letter_codes,
algorithm=pairwise_algorithm, matrix=matrix, gap_open_penalty=gap_open_penalty,
gap_extend_penalty=gap_extend_penalty,
end_gap_open_penalty=end_gap_open_penalty,
end_gap_extend_penalty=end_gap_extend_penalty)
+ strings, gaps = msa_general(one_letter_codes,
residue_numbers=res_num_list, msa_algorithm=msa_algorithm,
pairwise_algorithm=pairwise_algorithm, matrix=matrix,
gap_open_penalty=gap_open_penalty, gap_extend_penalty=gap_extend_penalty,
end_gap_open_penalty=end_gap_open_penalty,
end_gap_extend_penalty=end_gap_extend_penalty)
# Set up the data store object.
if not hasattr(ds, 'sequence_alignments'):
_______________________________________________
relax (http://www.nmr-relax.com)
This is the relax-commits mailing list
[email protected]
To unsubscribe from this list, get a password
reminder, or change your subscription options,
visit the list information page at
https://mail.gna.org/listinfo/relax-commits