Hello,

I just implemented the Needleman-Wunsch-Algorithm and an object for handling
substitution matrices like BLOSSUM, PAM, GONNET and so on. It parses a
matrix file and provides a method to get the costs for changing Symbol A to
Symbol B. This is realized by two hashes and a private int[][] matrix.
If NeedlemanWunsch gets equal weights for gap opening and gap extension it
won't consider affine gap penalties, whereas otherwise it will, which needs
three times more memory. It would also be good to have a kind of interface to create multiple
different implementations of pairwise sequence alignment algorithms which
could be used easily then. I also attached an Interface for that purpose.
The NeedlemanWunsch already implements. The HMM algorithm from the cookbook
also implements this interface with only little effort of changes.

Andreas Dräger
/*
 *                    BioJava development code
 *
 * This code may be freely distributed and modified under the
 * terms of the GNU Lesser General Public Licence.  This should
 * be distributed with the code.  If you do not have a copy,
 * see:
 *
 *      http://www.gnu.org/copyleft/lesser.html
 *
 * Copyright for this code is held jointly by the individual
 * authors.  These should be listed in @author doc comments.
 *
 * For more information on the BioJava project and its aims,
 * or to join the biojava-l mailing list, visit the home page
 * at:
 *
 *      http://www.biojava.org/
 *
 */


import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.IOException;
import java.util.HashMap;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.NoSuchElementException;

import org.biojava.bio.BioException;
import org.biojava.bio.seq.DNATools;
import org.biojava.bio.seq.Sequence;
import org.biojava.bio.seq.SequenceIterator;
import org.biojava.bio.seq.db.SequenceDB;
import org.biojava.bio.seq.impl.SimpleGappedSequence;
import org.biojava.bio.seq.impl.SimpleSequence;
import org.biojava.bio.seq.io.SeqIOTools;
import org.biojava.bio.seq.io.SymbolTokenization;
import org.biojava.bio.symbol.Alignment;
import org.biojava.bio.symbol.AlphabetManager;
import org.biojava.bio.symbol.FiniteAlphabet;
import org.biojava.bio.symbol.SimpleAlignment;
import org.biojava.bio.symbol.SimpleSymbolList;

/*
 * Created on 23.06.2005
 */

/**
  * @author Andreas Dräger
  */

public class NeedlemanWunsch
{

  protected double[][] CostMatrix;
  protected FiniteAlphabet alpha;
  protected SubstitutionMatrix subMatrix;
  private Alignment pairalign;
  private double insert;
  private double delete;
  private double gapExt;
  private String alignment;
  private double match;
  private double replace;

  
  /** Just for some tests.
    * @param args
    */
  public static void main (String args[])
  {
  	if (args.length < 3)
        throw new Error("Usage: NeedlemanWunsch sourceSeqFile targetSeqFile substitutionMatrixFile");
    try {
      FiniteAlphabet alphabet = (FiniteAlphabet) AlphabetManager.alphabetForName("DNA");
      SubstitutionMatrix matrix = new SubstitutionMatrix(alphabet, new File(args[2]));
      SequenceAlignment aligner = new NeedlemanWunsch( 
			alphabet, 
			2,  	// insert
			2,	// delete
			1, 	// gapExtend
			0, 	// match
			3,	// replace
			matrix 	// SubstitutionMatrix
		);
      aligner.alignAll(
        // sources
	SeqIOTools.readFastaDNA(new BufferedReader(new FileReader(args[0]))),
	// sequenceDB
	SeqIOTools.readFasta(new FileInputStream(new File(args[1])), DNATools.getDNA()));
    } catch (NoSuchElementException exc) {
      exc.printStackTrace();
    } catch (FileNotFoundException exc) {
      exc.printStackTrace();
    } catch (BioException exc) {
      exc.printStackTrace();
    } catch (IOException exc) {
      exc.printStackTrace();
    } catch (Exception exc) {
      exc.printStackTrace();
    }
  }

  /** Constructs a new Object with the given parameters based on the Needleman-Wunsch algorithm
    * @param alpha
    * @param insert
    * @param delete
    * @param gapExtend
    * @param subMat
    */
  public NeedlemanWunsch(FiniteAlphabet alpha, double insert, double delete, double gapExtend, double match, double replace, SubstitutionMatrix subMat)
  {
    this.alpha = alpha;
    this.subMatrix = subMat;
    this.alpha = subMatrix.getAlphabet();
    this.insert = insert;
    this.delete = delete;
    this.gapExt = gapExtend;  
    this.match  = match;
    this.replace = replace;
    this.alignment = "";
  }

  
  /** Prints a String representation of the CostMatrix for the given Alignment on the screen.
    * @param queryChar
    * @param targetChar
    * @return
    */
  public static String printCostMatrix (double[][] CostMatrix, char[] queryChar, char[] targetChar)
  {
    int line, col;
    String output = "\t";
   
    for (col=0; col<=targetChar.length; col++)
      if (col==0) output += "["+col+"]\t";
      else output += "["+targetChar[col-1]+"]\t";
    for (line=0; line<=queryChar.length; line++)
    {
      if (line==0) output += "\n["+line+"]\t";
      else output += "\n["+queryChar[line-1]+"]\t";
      for (col=0; col<=targetChar.length; col++)
        output += CostMatrix[line][col]+"\t";
    }
    output += "\ndelta[Edit] = "+CostMatrix[line-1][col-1]+"\n";
    return output;
  }

 
  /** Computes the optimal alignment of the two sequences and constructs a String
    * representing the alignment as well as a new Alignment object, which can be
    * used in different ways by BioJava.
    *  
    * @param query
    * @param target
    * @return alignment string representation
    */
  private String optimalAlignment(Sequence query, Sequence target) throws BioException
  {
    /*StringBuffer[] align = new StringBuffer[] {new StringBuffer(CostMatrix.length), new StringBuffer(CostMatrix[0].length)};
    StringBuffer path = new StringBuffer("");//*/
    
    String[] align = new String[] {"", ""};
    String path = "";//*/ 
    
    String output = "";
    int j = this.CostMatrix[CostMatrix.length - 1].length -1;
    SymbolTokenization st = alpha.getTokenization("default");

    for (int i = this.CostMatrix.length - 1; i>0; )
    {
      do { 
        // From now only Insert possible.
        if (i == 0) {
          align[0] = '~'     + align[0]; // st.tokenizeSymbol(alpha.getGapSymbol()) + align[0]; //
          align[1] = st.tokenizeSymbol(target.symbolAt(j--)) + align[1];
          path = ' ' + path;//*/
          
        // From now only Delete possible.
        } else if (j == 0) { 
          align[0] = st.tokenizeSymbol(query.symbolAt(i--))  + align[0];
          align[1] = '~'     + align[1]; // st.tokenizeSymbol(alpha.getGapSymbol()) + align[1]; //
          path = ' ' + path;//*/
          
        // Match/Replace
        } else if (CostMatrix[i-1][j-1] == min(CostMatrix[i][j-1], CostMatrix[i-1][j-1], CostMatrix[i-1][j]))  { 
          align[0] = st.tokenizeSymbol(query.symbolAt(i--)) + align[0];
          align[1] = st.tokenizeSymbol(target.symbolAt(j--)) + align[1];//*/
          if (query.symbolAt(i+1) == target.symbolAt(j+1))
            path = '|' + path;//*/ 
          else path = ' ' + path;//*/
          
        // Insert
        } else if ((CostMatrix[i][j-1] < CostMatrix[i-1][j-1]) && (CostMatrix[i][j-1] < CostMatrix[i-1][j])) {
          align[0] = '-'     + align[0]; // st.tokenizeSymbol(alpha.getGapSymbol()) + align[0]; //
          align[1] = st.tokenizeSymbol(target.symbolAt(j--)) + align[1];
          path = ' ' + path;//*/
  
        // Delete
        } else { 
          align[0] = st.tokenizeSymbol(query.symbolAt(i--))  + align[0];
          align[1] = '-'     + align[1]; // st.tokenizeSymbol(alpha.getGapSymbol()) + align[1]; //
          path = ' ' + path;//*/
        }
      } while (j>0);
    }

    // construct the biojava Alignment object:    
    query = new SimpleGappedSequence(
      new SimpleSequence(
        new SimpleSymbolList(query.getAlphabet().getTokenization("token"), align[0]), 
        query.getURN(), 
        query.getName(), 
        query.getAnnotation()));
    target = new SimpleGappedSequence(
      new SimpleSequence(
        new SimpleSymbolList(target.getAlphabet().getTokenization("token"), align[1]),
        target.getURN(),
        target.getName(),
        target.getAnnotation()));
    Map m = new HashMap();
    m.put(query.getName(), query);
    m.put(target.getName(), target);
    pairalign = new SimpleAlignment(m);
        
    output += "Length:\t"+align[0].length()+"\n";
    output += "Score:\t\t"+(-1)*getEditDistance()+"\n\n";
        
    int currline = Math.min(60, align[0].length()); 
    // counts the absoulute position in the String.

    output += "\nQuery:\t"  + align[0].substring(0, currline); 
    output += "\n\t"        + path.substring(0, currline);
    output += "\nTarget:\t" + align[1].substring(0, currline)+"\n";
    
    for (; currline+60 < align[0].length(); currline+=60) {
      output += "\nQuery:\t"  + align[0].substring(currline, currline+60);
      output += "\n\t"        + path.substring(currline, currline+60);
      output += "\nTarget:\t" + align[1].substring(currline, currline+60)+"\n";
    }
    if (currline+1 < align[0].length()) {
      output += "\nQuery:\t"  + align[0].substring(currline, align[0].length());
      output += "\n\t"        + path.substring(currline, path.length());
      output += "\nTarget:\t" + align[1].substring(currline, align[1].length())+"\n";
    }

    output += "\n\n";
 
    return output;    
  }
  
  /** prints the alignment String on the screen.
    * @param align
    */
  public static void printAlignment(String align)
  { 
    System.out.print(align);
  }
  
  
  /**
    * @return Alignment object containing the two gapped sequences constructed from query and target.
    * @throws Exception
    */
  public Alignment getAlignment(Sequence query, Sequence target) throws Exception
  {
    pairwiseAlignment(query, target);
    return pairalign;  
  }
  
  /** 
    * @return returns the edit_distance computed with the given parameters.
    */
  public double getEditDistance()
  {
    return CostMatrix[CostMatrix.length-1][CostMatrix[CostMatrix.length-1].length - 1];
  }

  
  /** 
    * @param x
    * @param y
    * @param z
    * @return Gives the minimum of three doubles
    */
  protected static double min (double x, double y, double z)
  {
    if ((x < y) && (x < z)) return x;
    if (y < z) return y;
    return z;
  }


  /* (non-Javadoc)
   * @see toolbox.align.SequenceAlignment#getAlignment()
   */
  public String getAlignmentString() throws BioException
  {
    return alignment;
  }
  
  
  /* (non-Javadoc)
   * @see toolbox.align.SequenceAlignment#alignAll(org.biojava.bio.seq.SequenceIterator, org.biojava.bio.seq.db.SequenceDB)
   */
  public List alignAll(SequenceIterator source, SequenceDB subjectDB) throws NoSuchElementException, BioException
  {
    List l = new LinkedList();
    while (source.hasNext()) {
  	  Sequence query = source.nextSequence();
  	  // compare all the sequences of both sets.
  	  SequenceIterator target = subjectDB.sequenceIterator();
      while (target.hasNext()) 
        try {
          l.add(getAlignment(query, target.nextSequence()));
          //pairwiseAlignment(query, target.nextSequence());
        } catch (Exception exc) {
          exc.printStackTrace();
        }
    }
    return l;
  }


  /** Global pairwise sequence alginment of two BioJava-Sequence objects according to the
    * Needleman-Wunsch-algorithm. 
    *
    * @see toolbox.align.SequenceAlignment#pairAlign(org.biojava.bio.seq.Sequence, org.biojava.bio.seq.Sequence)
    */
  public double pairwiseAlignment(Sequence query, Sequence subject)
  {
    long time1 = System.currentTimeMillis();
    
	int   i, j;
    double matchReplace;
    this.CostMatrix   = new double[query.length()+1][subject.length()+1]; // Matrix CostMatrix

    // construct the matrix:
    CostMatrix[0][0] = 0;
    
    /* If we want to have affine gap penalties, we have to initialise additional matrices:
     * If this is not necessary, we won't do that (because it's expensive).
     */
    if ((gapExt != delete) || (gapExt != insert)) {
      
      double[][] E = new double[query.length()+1][subject.length()+1];	// Inserts
      double[][] F = new double[query.length()+1][subject.length()+1];	// Deletes
      double[][] G = new double[query.length()+1][subject.length()+1];	// Match/Replace
      
      G[0][0] = 0;
      E[0][0] = F[0][0]   = Double.MAX_VALUE;
      for (i=1; i<=query.length();   i++) {
        CostMatrix[i][0]  = CostMatrix[i-1][0] + delete;
        G[i][0] = E[i][0] = Double.MAX_VALUE;
        F[i][0] = delete;
      }
      for (j=1; j<=subject.length(); j++) {
        CostMatrix[0][j]  = CostMatrix[0][j-1] + insert;
        G[0][j] = F[0][j] = Double.MAX_VALUE;
        E[0][j] = insert;
      }
      for (i=1; i<=query.length();   i++)
        for (j=1; j<=subject.length(); j++)
        {
          try {
            matchReplace = subMatrix.getValueAt(query.symbolAt(i), subject.symbolAt(j));
          } catch (IndexOutOfBoundsException exc) {
            exc.printStackTrace();
          } catch (BioException exc) {
            System.err.println(exc.getMessage());
          } finally {
            if (query.symbolAt(i).getMatches().contains(subject.symbolAt(j)) ||
                subject.symbolAt(j).getMatches().contains(query.symbolAt(i)))
              matchReplace = match; //0;
            else matchReplace = replace; //delete/2 + insert/2;
          }
          G[i][j] = CostMatrix[i-1][j-1] - matchReplace;
          E[i][j] = Math.min(E[i][j-1], CostMatrix[i][j-1] + insert) + gapExt;
          F[i][j] = Math.min(F[i-1][j], CostMatrix[i-1][j] + delete) + gapExt;
          CostMatrix[i][j] = min (E[i][j], F[i][j], G[i][j]);
        }
      
    } else {
      /*
       * No affine gap penalties, constant gap penalties, which is much faster and needs less memory.
       */
      for (i=1; i<=query.length();   i++) CostMatrix[i][0] = CostMatrix[i-1][0] + delete;
      for (j=1; j<=subject.length(); j++) CostMatrix[0][j] = CostMatrix[0][j-1] + insert;
      for (i=1; i<=query.length();   i++)
        for (j=1; j<=subject.length(); j++)
        {        
          try {
            matchReplace = subMatrix.getValueAt(query.symbolAt(i), subject.symbolAt(j));
          } catch (IndexOutOfBoundsException exc) {
            exc.printStackTrace();
          } catch (BioException exc) {
            System.err.println(exc.getMessage());
          } finally {
            if (query.symbolAt(i).getMatches().contains(subject.symbolAt(j)) ||
                subject.symbolAt(j).getMatches().contains(query.symbolAt(i)))
              matchReplace = match; //0;
            else matchReplace = replace; //delete/2 + insert/2;
          }
          CostMatrix[i][j] = min (
            CostMatrix[i-1][j]   + delete, 
            CostMatrix[i][j-1]   + insert, 
            CostMatrix[i-1][j-1] - matchReplace);
        }
    } 
    // Everything is the same from here:
    try {
      //this.printCostMatrix(query.seqString().toCharArray(), target.seqString().toCharArray());
      // generate "String" with optimal Alignment.
      this.alignment += this.optimalAlignment(query, subject) + "\n";
      // print the Alignment on the screen
      this.printAlignment(alignment);		
    } catch (BioException exc) {
      exc.printStackTrace();
    }    
    
    time1 = System.currentTimeMillis() - time1;
    System.out.println("Zeit:\t"+time1);
    
    return getEditDistance();
  }


}
/*
 *                    BioJava development code
 *
 * This code may be freely distributed and modified under the
 * terms of the GNU Lesser General Public Licence.  This should
 * be distributed with the code.  If you do not have a copy,
 * see:
 *
 *      http://www.gnu.org/copyleft/lesser.html
 *
 * Copyright for this code is held jointly by the individual
 * authors.  These should be listed in @author doc comments.
 *
 * For more information on the BioJava project and its aims,
 * or to join the biojava-l mailing list, visit the home page
 * at:
 *
 *      http://www.biojava.org/
 *
 */

/*
 * Created on 01.08.2005
 *
 */
import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;
import java.util.StringTokenizer;

import org.biojava.bio.BioException;
import org.biojava.bio.seq.io.SymbolTokenization;
import org.biojava.bio.symbol.AlphabetManager;
import org.biojava.bio.symbol.FiniteAlphabet;
import org.biojava.bio.symbol.IllegalAlphabetException;
import org.biojava.bio.symbol.IllegalSymbolException;
import org.biojava.bio.symbol.Symbol;

/** This object is able to read a substitution matrix file and constructs an int matrix to
  * store the matrix. Every single element of the matrix can be accessed by the method
  * <code>getValueAt</code> with the parameters being tow biojava symbols. This it why it is
  * not necessary to access the matrix directly. If there is no value for the two specified
  * <code>Symbol</code>s an <code>Exception</code> is thrown.
  *
  * @author Andreas Dr&auml;ger
  *
  */
public class SubstitutionMatrix
{
  protected Map rowSymbols, colSymbols;
  protected int[][] matrix;
  protected int min, max;
  protected FiniteAlphabet alphabet;
  protected String description, name;
  
  /** This constructs a <code>SubstitutionMatrix</code>-object that contains two 
    * <code>Map</code> data structures having biojava symbols as key and the value
    * being the index of the matrix containing the substitution score.
    * 
    * @param alpha the alphabet of the matrix (DNA, RNA or PROTEIN, or PROTEIN-TERM)
    * @param matrixFile the file containing the substitution matrix. Lines starting with
    *   '#' are comments. The line starting with a white space, is the table head. 
    *   Every line has to start with the one letter representation of the Symbol and then 
    *   the values for the exchange.
    * 
    * @throws IOException
    * @throws BioException
    */
  public SubstitutionMatrix(FiniteAlphabet alpha, File matrixFile) throws IOException, BioException
  {
    this.alphabet     = alpha;
    this.description  = "";
    this.name         = matrixFile.getName();
    this.rowSymbols   = new HashMap();
    this.colSymbols   = new HashMap();
    String matString  = "";
    BufferedReader br = new BufferedReader(new FileReader(matrixFile));
    while(br.ready()) 
      matString      += br.readLine() + "\n";
    this.matrix       = this.parseMatrix(matString);
    //this.printMatrix();
  }
  
  
  /** With this constructor it is possible to construct a SubstitutionMatrix object from a
    * substitution matrix file. The given String contains a number of lines separated by 
    * <code>\n</code>. Everything else is the same than for the constructor above.
    * 
    * @param alpha Das FiniteAlphabet, das benutzt werden soll.
    * @param matrixString
    * @param name der Matrix.
    * @throws BioException
    */
  public SubstitutionMatrix(FiniteAlphabet alpha, String matrixString, String name) throws BioException
  {
    this.alphabet    = alpha;
    this.description = "";
    this.name        = name;
    this.rowSymbols  = new HashMap();
    this.colSymbols  = new HashMap();
    this.matrix = this.parseMatrix(matrixString);   
    //this.printMatrix();
  }
  
  
  /** Constructs a SubstitutionMatrix with every Match and every Replace having the same
    * expenses given by the parameters. Ambigious symbols are not considered because there
    * might be to many of them (for proteins).
    *  
    * @param alpha
    * @param match
    * @param replace
    */
  public SubstitutionMatrix(FiniteAlphabet alpha, int match, int replace)
  {
    int i=0, j=0;
    
    this.alphabet = alpha;
    this.description = "Identity matrix. All replaces and all matches are treated equally.";
    this.name = "IDENTITY_"+match+"_"+replace;
    this.rowSymbols = new HashMap();
    this.colSymbols = new HashMap();
    this.matrix = new int[alpha.size()][alpha.size()];
    
    Symbol[] sym = new Symbol[alpha.size()];
    Iterator iter = alpha.iterator();

    for (i=0; iter.hasNext(); i++) {
      sym[i] = (Symbol) iter.next();
      rowSymbols.put(sym[i], Integer.valueOf(i));
      colSymbols.put(sym[i], Integer.valueOf(i));
    }   
    
    for(i=0; i<alphabet.size(); i++)
      for(j=0; j<alphabet.size(); j++) 
        if (sym[i].getMatches().contains(sym[j])) matrix[i][j] = match;
        else matrix[i][j] = replace;
    
    //this.printMatrix();
  }
  
  
  /** Reads a String representing the contents of a substitution matrix file.
    *  
    * @param matString
    * @return matrix
    * @throws BioException
    */
  protected int[][] parseMatrix(String matString) throws BioException
  {
    int i = 0, j = 0, rows = 0, cols = 0;
    StringTokenizer br, st;
    SymbolTokenization symtok = alphabet.getTokenization("token");

    this.min = Integer.MAX_VALUE;
    this.max = Integer.MIN_VALUE;
    /* First: count how many elements are in the matrix
     * fill lines and rows
     */
    br = new StringTokenizer(matString, "\n");
    while (br.hasMoreElements()) {
      String line = br.nextElement().toString();
      if (line.startsWith("#")) {
        description += line.substring(1);
        continue;
      } else if (line.startsWith(" ")) {
        st = new StringTokenizer(line, " ");
        for (j=0; st.hasMoreElements(); j++) { 
          colSymbols.put(symtok.parseToken(st.nextElement().toString()), Integer.valueOf(j));
        }
        cols = j;
      } else if (!line.startsWith("\n")) { // the matrix.
        st = new StringTokenizer(line, " ");
        if (st.hasMoreElements()) 
          rowSymbols.put(symtok.parseToken(st.nextElement().toString()), Integer.valueOf(rows++));
      }
    }
    
    int[][] matrix = new int[rows][cols];
    
    rows = 0;
    br = new StringTokenizer(matString, "\n");
    /* Second reading. Fill the matrix.
     */
    while (br.hasMoreElements())
    {
      String line = br.nextElement().toString();
      if      (line.startsWith("#")) continue;
      else if (line.startsWith(" ")) continue;
      else if (!line.startsWith("\n")) { // lines:
        st = new StringTokenizer(line, " ");
        if (st.hasMoreElements()) st.nextElement(); // throw away Symbol at beginning.
        for (j=0; st.hasMoreElements(); j++) {// cols:
          matrix[rows][j] = Integer.parseInt(st.nextElement().toString());
          if (matrix[rows][j] > max) max = matrix[rows][j]; // Maximum.
          if (matrix[rows][j] < min) min = matrix[rows][j]; // Minimum.
        }
        rows++;
      }
    }
    
    return matrix;
  }
  
  
  /** There are some substitution matrices containing more columns than lines. This
    * has to do with the ambigious symbols. Lines are always good, columns might not
    * contain the whole information. The matrix is supposed to be symmetric anyway, so
    * you can always set the ambigious symbol to be the first argument.
    * @param row Symbol of the line
    * @param col Symbol of the column
    * @return expenses for the exchange of symbol row and symbol col.
    * @throws BioException
    */
  public int getValueAt(Symbol row, Symbol col) throws BioException
  {
    if ((!rowSymbols.containsKey(row)) || (!colSymbols.containsKey(col)))
      throw new BioException("No entry for the sybols "+row.getName()+" and "+col.getName());
    return matrix[((Integer) rowSymbols.get(row)).intValue()][((Integer) colSymbols.get(col)).intValue()];
  }
  
  
  /**
    * @return the comment of the matrix
    */
  public String getDescription()
  {
    return description;
  }
  
  
  /**
    * @return the name of the matrix.
    */
  public String getName()
  {
    return name;
  }
  
  
  /**
    * @return minimum of the matrix.
    */
  public int getMin()
  {
    return min;
  }
  
  
  /**
    * 
    * @return maximum of the matrix.
    */
  public int getMax()
  {
    return max;
  }
  
  
  /** Sets the description to the given value.
    * 
    * @param desc a description. This doesn't have to start with '#'.
    */
  public void setDescription(String desc)
  {
    this.description = desc;
  }
  
  
  /** 
    * @return the alphabet of this matrix.
    */
  public FiniteAlphabet getAlphabet()
  {
    return alphabet;
  }
  
  
  /**
    * @return a string representation of this matrix without the description.
    */
  public String stringnifyMatrix()
  {
    int i = 0;
    String matrixString = "";
    Symbol[] colSyms    = new Symbol[this.colSymbols.keySet().size()];
    
    try {
      SymbolTokenization symtok = alphabet.getTokenization("default");  
      matrixString += "  ";
      Iterator colKeys = colSymbols.keySet().iterator();
      while (colKeys.hasNext()) {
        colSyms[i]    = (Symbol) colKeys.next(); 
        matrixString += symtok.tokenizeSymbol(colSyms[i++]).toUpperCase() + " ";
      }
      matrixString += "\n";
    
      Iterator rowKeys = rowSymbols.keySet().iterator();
      while (rowKeys.hasNext()) {
        Symbol rowSym = (Symbol) rowKeys.next();
        matrixString += symtok.tokenizeSymbol(rowSym).toUpperCase() + " ";
        for (i=0; i<colSyms.length; i++) {
          matrixString += getValueAt(rowSym, colSyms[i]) + " ";
        }
        matrixString += "\n";
      }  
    } catch (BioException exc) {
      exc.printStackTrace();
    }
    
    return matrixString;  
  }
  
  
  /**   
    * @return Gives a description with approximately 60 letters on every line 
    *   separated by <code>\n</code>. Every line starts with <code>#</code>.
    */
  public String stringnifyDescription()
  {
    String desc = "", line = "# ";
    StringTokenizer st  = new StringTokenizer(description, " ");
    while(st.hasMoreElements()) {
      line += st.nextElement().toString()+" ";
      if (line.length() >= 60) {
        desc += line + "\n";
        if (st.hasMoreElements()) line = "# ";
      } else if (!st.hasMoreElements())
        desc += line + "\n";
    }    
    return desc;
  }
  
  
  /** Overides the inherited method.
    * @return Gives a string representation of the SubstitutionMatrix. 
    *   This is a valid input for the constructor which needs a matrix string. This String
    *   also contains the description of the matrix if there is one.
    */
  public String toString()
  {
    String desc = "", line = "# ";
    StringTokenizer st  = new StringTokenizer(description, " ");
    while(st.hasMoreElements()) {
      line += st.nextElement().toString()+" ";
      if (line.length() >= 60) {
        desc += line + "\n";
        if (st.hasMoreElements()) line = "# ";
      } else if (!st.hasMoreElements())
        desc += line + "\n";
    }    
    
    return desc + stringnifyMatrix();
  }

  
  /** Just to perform some test. It prints the matrix on the screen.
    */
  public void printMatrix()
  {
    // Testausgabe: 
    Iterator rowKeys = rowSymbols.keySet().iterator();
    while(rowKeys.hasNext()) {
      Iterator colKeys = colSymbols.keySet().iterator();
      Symbol rowSym = (Symbol) rowKeys.next();
      System.out.print(rowSym.getName()+"\t");
      while(colKeys.hasNext()) {
        Symbol colSym = (Symbol) colKeys.next();
        int x = ((Integer) rowSymbols.get(rowSym)).intValue();
        int y = ((Integer) colSymbols.get(colSym)).intValue();
        System.out.print(colSym.getName()+" "+" "+x+" "+y+" "+matrix[x][y]+"\t");
      }
      System.out.println("\n");
    }
    System.out.println(toString());
  }

  
  /** Just for tests.
    * @param args
    */
  public static void main(String args[])
  {
    try {
      new SubstitutionMatrix((FiniteAlphabet) AlphabetManager.alphabetForName("PROTEIN-TERM"), new File(args[0]));
    } catch (IllegalSymbolException exc) {
      exc.printStackTrace();
    } catch (IOException exc) {
      exc.printStackTrace();
    } catch (IllegalAlphabetException exc) {
      exc.printStackTrace();
    } catch (BioException exc) {
      exc.printStackTrace();
    }  
  }

  
}
/*
 *                    BioJava development code
 *
 * This code may be freely distributed and modified under the
 * terms of the GNU Lesser General Public Licence.  This should
 * be distributed with the code.  If you do not have a copy,
 * see:
 *
 *      http://www.gnu.org/copyleft/lesser.html
 *
 * Copyright for this code is held jointly by the individual
 * authors.  These should be listed in @author doc comments.
 *
 * For more information on the BioJava project and its aims,
 * or to join the biojava-l mailing list, visit the home page
 * at:
 *
 *      http://www.biojava.org/
 *
 */

/*
 * Created on 03.08.2005
 *
 */
package toolbox.align;

import java.util.LinkedList;
import java.util.List;
import java.util.NoSuchElementException;

import org.biojava.bio.BioException;
import org.biojava.bio.seq.Sequence;
import org.biojava.bio.seq.SequenceIterator;
import org.biojava.bio.seq.db.SequenceDB;
import org.biojava.bio.symbol.Alignment;

/** This Interface provides methods for the alignment of biosequences.
  *
  * @author Andreas Dr&auml;ger
  *
  */
public interface SequenceAlignment
{
  /**
    * @return a string representation of the alignment
    * @throws BioException
    */
  public String getAlignmentString() throws Exception;

  
  /**
    * @param source a SequenceIterator containing a set of sequences to be aligned with
    * @param subjectDB the SequenceDB containing another set of sequences.
    * @return a list containing the results of all single alignments performed by this
    *   method.
    * @throws NoSuchElementException
    * @throws Exception
    */
  public List alignAll(SequenceIterator source, SequenceDB subjectDB) throws Exception;
  
  
  /** Performs a pairwise sequence alignment of the two given sequences.
    * @param query
    * @param subject
    * @return score of the alignment or the distance.
    * @throws Exception
    */
  public double pairwiseAlignment(Sequence query, Sequence subject) throws Exception;
 
  
  /** This method also performs a sequence alignment of the two given sequences but it
    * returns an Alignment object instead of the score.
    * @param query
    * @param subject
    * @return Alignment
    */
  public Alignment getAlignment(Sequence query, Sequence subject) throws Exception;
  
}
_______________________________________________
Biojava-l mailing list  -  Biojava-l@biojava.org
http://biojava.org/mailman/listinfo/biojava-l

Reply via email to