Hello,

I just implemented the Needleman-Wunsch-Algorithm and an object for handling substitution matrices like BLOSSUM, PAM, GONNET and so on. It parses a matrix file and provides a method to get the costs for changing Symbol A to Symbol B. This is realized by two hashes and a private int[][] matrix. If NeedlemanWunsch gets equal weights for gap opening and gap extension it won't consider affine gap penalties, whereas otherwise it will, which needs three times more memory.

Andreas Dräger
/*
 *                    BioJava development code
 *
 * This code may be freely distributed and modified under the
 * terms of the GNU Lesser General Public Licence.  This should
 * be distributed with the code.  If you do not have a copy,
 * see:
 *
 *      http://www.gnu.org/copyleft/lesser.html
 *
 * Copyright for this code is held jointly by the individual
 * authors.  These should be listed in @author doc comments.
 *
 * For more information on the BioJava project and its aims,
 * or to join the biojava-l mailing list, visit the home page
 * at:
 *
 *      http://www.biojava.org/
 *
 */

/*
 * Created on 01.08.2005
 *
 */
import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.util.HashMap;
import java.util.Iterator;
import java.util.Map;
import java.util.StringTokenizer;

import org.biojava.bio.BioException;
import org.biojava.bio.seq.io.SymbolTokenization;
import org.biojava.bio.symbol.AlphabetManager;
import org.biojava.bio.symbol.FiniteAlphabet;
import org.biojava.bio.symbol.IllegalAlphabetException;
import org.biojava.bio.symbol.IllegalSymbolException;
import org.biojava.bio.symbol.Symbol;

/** This object is able to read a substitution matrix file and constructs an int matrix to
  * store the matrix. Every single element of the matrix can be accessed by the method
  * <code>getValueAt</code> with the parameters being tow biojava symbols. This it why it is
  * not necessary to access the matrix directly. If there is no value for the two specified
  * <code>Symbol</code>s an <code>Exception</code> is thrown.
  *
  * @author Andreas Dr&auml;ger
  *
  */
public class SubstitutionMatrix
{
  protected Map rowSymbols, colSymbols;
  protected int[][] matrix;
  protected int min, max;
  protected FiniteAlphabet alphabet;
  protected String description, name;
  
  /** This constructs a <code>SubstitutionMatrix</code>-object that contains two 
    * <code>Map</code> data structures having biojava symbols as key and the value
    * being the index of the matrix containing the substitution score.
    * 
    * @param alpha the alphabet of the matrix (DNA, RNA or PROTEIN, or PROTEIN-TERM)
    * @param matrixFile the file containing the substitution matrix. Lines starting with
    *   '#' are comments. The line starting with a white space, is the table head. 
    *   Every line has to start with the one letter representation of the Symbol and then 
    *   the values for the exchange.
    * 
    * @throws IOException
    * @throws BioException
    */
  public SubstitutionMatrix(FiniteAlphabet alpha, File matrixFile) throws IOException, BioException
  {
    this.alphabet     = alpha;
    this.description  = "";
    this.name         = matrixFile.getName();
    this.rowSymbols   = new HashMap();
    this.colSymbols   = new HashMap();
    String matString  = "";
    BufferedReader br = new BufferedReader(new FileReader(matrixFile));
    while(br.ready()) 
      matString      += br.readLine() + "\n";
    this.matrix       = this.parseMatrix(matString);
    //this.printMatrix();
  }
  
  
  /** With this constructor it is possible to construct a SubstitutionMatrix object from a
    * substitution matrix file. The given String contains a number of lines separated by 
    * <code>\n</code>. Everything else is the same than for the constructor above.
    * 
    * @param alpha Das FiniteAlphabet, das benutzt werden soll.
    * @param matrixString
    * @param name der Matrix.
    * @throws BioException
    */
  public SubstitutionMatrix(FiniteAlphabet alpha, String matrixString, String name) throws BioException
  {
    this.alphabet    = alpha;
    this.description = "";
    this.name        = name;
    this.rowSymbols  = new HashMap();
    this.colSymbols  = new HashMap();
    this.matrix = this.parseMatrix(matrixString);   
    //this.printMatrix();
  }
  
  
  /** Constructs a SubstitutionMatrix with every Match and every Replace having the same
    * expenses given by the parameters. Ambigious symbols are not considered because there
    * might be to many of them (for proteins).
    *  
    * @param alpha
    * @param match
    * @param replace
    */
  public SubstitutionMatrix(FiniteAlphabet alpha, int match, int replace)
  {
    int i=0, j=0;
    
    this.alphabet = alpha;
    this.description = "Identity matrix. All replaces and all matches are treated equally.";
    this.name = "IDENTITY_"+match+"_"+replace;
    this.rowSymbols = new HashMap();
    this.colSymbols = new HashMap();
    this.matrix = new int[alpha.size()][alpha.size()];
    
    Symbol[] sym = new Symbol[alpha.size()];
    Iterator iter = alpha.iterator();

    for (i=0; iter.hasNext(); i++) {
      sym[i] = (Symbol) iter.next();
      rowSymbols.put(sym[i], Integer.valueOf(i));
      colSymbols.put(sym[i], Integer.valueOf(i));
    }   
    
    for(i=0; i<alphabet.size(); i++)
      for(j=0; j<alphabet.size(); j++) 
        if (sym[i].getMatches().contains(sym[j])) matrix[i][j] = match;
        else matrix[i][j] = replace;
    
    //this.printMatrix();
  }
  
  
  /** Reads a String representing the contents of a substitution matrix file.
    *  
    * @param matString
    * @return matrix
    * @throws BioException
    */
  protected int[][] parseMatrix(String matString) throws BioException
  {
    int i = 0, j = 0, rows = 0, cols = 0;
    StringTokenizer br, st;
    SymbolTokenization symtok = alphabet.getTokenization("token");

    this.min = Integer.MAX_VALUE;
    this.max = Integer.MIN_VALUE;
    /* First: count how many elements are in the matrix
     * fill lines and rows
     */
    br = new StringTokenizer(matString, "\n");
    while (br.hasMoreElements()) {
      String line = br.nextElement().toString();
      if (line.startsWith("#")) {
        description += line.substring(1);
        continue;
      } else if (line.startsWith(" ")) {
        st = new StringTokenizer(line, " ");
        for (j=0; st.hasMoreElements(); j++) { 
          colSymbols.put(symtok.parseToken(st.nextElement().toString()), Integer.valueOf(j));
        }
        cols = j;
      } else if (!line.startsWith("\n")) { // the matrix.
        st = new StringTokenizer(line, " ");
        if (st.hasMoreElements()) 
          rowSymbols.put(symtok.parseToken(st.nextElement().toString()), Integer.valueOf(rows++));
      }
    }
    
    int[][] matrix = new int[rows][cols];
    
    rows = 0;
    br = new StringTokenizer(matString, "\n");
    /* Second reading. Fill the matrix.
     */
    while (br.hasMoreElements())
    {
      String line = br.nextElement().toString();
      if      (line.startsWith("#")) continue;
      else if (line.startsWith(" ")) continue;
      else if (!line.startsWith("\n")) { // lines:
        st = new StringTokenizer(line, " ");
        if (st.hasMoreElements()) st.nextElement(); // throw away Symbol at beginning.
        for (j=0; st.hasMoreElements(); j++) {// cols:
          matrix[rows][j] = Integer.parseInt(st.nextElement().toString());
          if (matrix[rows][j] > max) max = matrix[rows][j]; // Maximum.
          if (matrix[rows][j] < min) min = matrix[rows][j]; // Minimum.
        }
        rows++;
      }
    }
    
    return matrix;
  }
  
  
  /** There are some substitution matrices containing more columns than lines. This
    * has to do with the ambigious symbols. Lines are always good, columns might not
    * contain the whole information. The matrix is supposed to be symmetric anyway, so
    * you can always set the ambigious symbol to be the first argument.
    * @param row Symbol of the line
    * @param col Symbol of the column
    * @return expenses for the exchange of symbol row and symbol col.
    * @throws BioException
    */
  public int getValueAt(Symbol row, Symbol col) throws BioException
  {
    if ((!rowSymbols.containsKey(row)) || (!colSymbols.containsKey(col)))
      throw new BioException("No entry for the sybols "+row.getName()+" and "+col.getName());
    return matrix[((Integer) rowSymbols.get(row)).intValue()][((Integer) colSymbols.get(col)).intValue()];
  }
  
  
  /**
    * @return the comment of the matrix
    */
  public String getDescription()
  {
    return description;
  }
  
  
  /**
    * @return the name of the matrix.
    */
  public String getName()
  {
    return name;
  }
  
  
  /**
    * @return minimum of the matrix.
    */
  public int getMin()
  {
    return min;
  }
  
  
  /**
    * 
    * @return maximum of the matrix.
    */
  public int getMax()
  {
    return max;
  }
  
  
  /** Sets the description to the given value.
    * 
    * @param desc a description. This doesn't have to start with '#'.
    */
  public void setDescription(String desc)
  {
    this.description = desc;
  }
  
  
  /** 
    * @return the alphabet of this matrix.
    */
  public FiniteAlphabet getAlphabet()
  {
    return alphabet;
  }
  
  
  /**
    * @return a string representation of this matrix without the description.
    */
  public String stringnifyMatrix()
  {
    int i = 0;
    String matrixString = "";
    Symbol[] colSyms    = new Symbol[this.colSymbols.keySet().size()];
    
    try {
      SymbolTokenization symtok = alphabet.getTokenization("default");  
      matrixString += "  ";
      Iterator colKeys = colSymbols.keySet().iterator();
      while (colKeys.hasNext()) {
        colSyms[i]    = (Symbol) colKeys.next(); 
        matrixString += symtok.tokenizeSymbol(colSyms[i++]).toUpperCase() + " ";
      }
      matrixString += "\n";
    
      Iterator rowKeys = rowSymbols.keySet().iterator();
      while (rowKeys.hasNext()) {
        Symbol rowSym = (Symbol) rowKeys.next();
        matrixString += symtok.tokenizeSymbol(rowSym).toUpperCase() + " ";
        for (i=0; i<colSyms.length; i++) {
          matrixString += getValueAt(rowSym, colSyms[i]) + " ";
        }
        matrixString += "\n";
      }  
    } catch (BioException exc) {
      exc.printStackTrace();
    }
    
    return matrixString;  
  }
  
  
  /**   
    * @return Gives a description with approximately 60 letters on every line 
    *   separated by <code>\n</code>. Every line starts with <code>#</code>.
    */
  public String stringnifyDescription()
  {
    String desc = "", line = "# ";
    StringTokenizer st  = new StringTokenizer(description, " ");
    while(st.hasMoreElements()) {
      line += st.nextElement().toString()+" ";
      if (line.length() >= 60) {
        desc += line + "\n";
        if (st.hasMoreElements()) line = "# ";
      } else if (!st.hasMoreElements())
        desc += line + "\n";
    }    
    return desc;
  }
  
  
  /** Overides the inherited method.
    * @return Gives a string representation of the SubstitutionMatrix. 
    *   This is a valid input for the constructor which needs a matrix string. This String
    *   also contains the description of the matrix if there is one.
    */
  public String toString()
  {
    String desc = "", line = "# ";
    StringTokenizer st  = new StringTokenizer(description, " ");
    while(st.hasMoreElements()) {
      line += st.nextElement().toString()+" ";
      if (line.length() >= 60) {
        desc += line + "\n";
        if (st.hasMoreElements()) line = "# ";
      } else if (!st.hasMoreElements())
        desc += line + "\n";
    }    
    
    return desc + stringnifyMatrix();
  }

  
  /** Just to perform some test. It prints the matrix on the screen.
    */
  public void printMatrix()
  {
    // Testausgabe: 
    Iterator rowKeys = rowSymbols.keySet().iterator();
    while(rowKeys.hasNext()) {
      Iterator colKeys = colSymbols.keySet().iterator();
      Symbol rowSym = (Symbol) rowKeys.next();
      System.out.print(rowSym.getName()+"\t");
      while(colKeys.hasNext()) {
        Symbol colSym = (Symbol) colKeys.next();
        int x = ((Integer) rowSymbols.get(rowSym)).intValue();
        int y = ((Integer) colSymbols.get(colSym)).intValue();
        System.out.print(colSym.getName()+" "+" "+x+" "+y+" "+matrix[x][y]+"\t");
      }
      System.out.println("\n");
    }
    System.out.println(toString());
  }

  
  /** Just for tests.
    * @param args
    */
  public static void main(String args[])
  {
    try {
      new SubstitutionMatrix((FiniteAlphabet) AlphabetManager.alphabetForName("PROTEIN-TERM"), new File(args[0]));
    } catch (IllegalSymbolException exc) {
      exc.printStackTrace();
    } catch (IOException exc) {
      exc.printStackTrace();
    } catch (IllegalAlphabetException exc) {
      exc.printStackTrace();
    } catch (BioException exc) {
      exc.printStackTrace();
    }  
  }

  
}
/*
 *                    BioJava development code
 *
 * This code may be freely distributed and modified under the
 * terms of the GNU Lesser General Public Licence.  This should
 * be distributed with the code.  If you do not have a copy,
 * see:
 *
 *      http://www.gnu.org/copyleft/lesser.html
 *
 * Copyright for this code is held jointly by the individual
 * authors.  These should be listed in @author doc comments.
 *
 * For more information on the BioJava project and its aims,
 * or to join the biojava-l mailing list, visit the home page
 * at:
 *
 *      http://www.biojava.org/
 *
 */


import java.io.BufferedReader;
import java.io.File;
import java.io.FileInputStream;
import java.io.FileNotFoundException;
import java.io.FileReader;
import java.io.IOException;
import java.util.HashMap;
import java.util.LinkedList;
import java.util.List;
import java.util.Map;
import java.util.NoSuchElementException;

import org.biojava.bio.BioException;
import org.biojava.bio.seq.DNATools;
import org.biojava.bio.seq.Sequence;
import org.biojava.bio.seq.SequenceIterator;
import org.biojava.bio.seq.db.SequenceDB;
import org.biojava.bio.seq.impl.SimpleGappedSequence;
import org.biojava.bio.seq.impl.SimpleSequence;
import org.biojava.bio.seq.io.SeqIOTools;
import org.biojava.bio.seq.io.SymbolTokenization;
import org.biojava.bio.symbol.Alignment;
import org.biojava.bio.symbol.AlphabetManager;
import org.biojava.bio.symbol.FiniteAlphabet;
import org.biojava.bio.symbol.SimpleAlignment;
import org.biojava.bio.symbol.SimpleSymbolList;

/*
 * Created on 23.06.2005
 */

/**
  * @author Andreas Dr&auml;ger
  */

public class NeedlemanWunsch
{

  protected double[][] CostMatrix;
  protected FiniteAlphabet alpha;
  protected SubstitutionMatrix subMatrix;
  private Alignment pairalign;
  private double insert;
  private double delete;
  private double gapExt;
  private String alignment;
  private double match;
  private double replace;

  
  /** Just for some tests.
    * @param args
    */
  public static void main (String args[])
  {
  	if (args.length < 3)
        throw new Error("Usage: NeedlemanWunsch sourceSeqFile targetSeqFile substitutionMatrixFile");
    try {
      FiniteAlphabet alphabet = (FiniteAlphabet) AlphabetManager.alphabetForName("DNA");
      SubstitutionMatrix matrix = new SubstitutionMatrix(alphabet, new File(args[2]));
      SequenceAlignment aligner = new NeedlemanWunsch( 
			alphabet, 
			2,  	// insert
			2,	// delete
			1, 	// gapExtend
			0, 	// match
			3,	// replace
			matrix 	// SubstitutionMatrix
		);
      aligner.alignAll(
        // sources
	SeqIOTools.readFastaDNA(new BufferedReader(new FileReader(args[0]))),
	// sequenceDB
	SeqIOTools.readFasta(new FileInputStream(new File(args[1])), DNATools.getDNA()));
    } catch (NoSuchElementException exc) {
      exc.printStackTrace();
    } catch (FileNotFoundException exc) {
      exc.printStackTrace();
    } catch (BioException exc) {
      exc.printStackTrace();
    } catch (IOException exc) {
      exc.printStackTrace();
    } catch (Exception exc) {
      exc.printStackTrace();
    }
  }

  /** Constructs a new Object with the given parameters based on the Needleman-Wunsch algorithm
    * @param alpha
    * @param insert
    * @param delete
    * @param gapExtend
    * @param subMat
    */
  public NeedlemanWunsch(FiniteAlphabet alpha, double insert, double delete, double gapExtend, double match, double replace, SubstitutionMatrix subMat)
  {
    this.alpha = alpha;
    this.subMatrix = subMat;
    this.alpha = subMatrix.getAlphabet();
    this.insert = insert;
    this.delete = delete;
    this.gapExt = gapExtend;  
    this.match  = match;
    this.replace = replace;
    this.alignment = "";
  }

  
  /** Prints a String representation of the CostMatrix for the given Alignment on the screen.
    * @param queryChar
    * @param targetChar
    * @return
    */
  public static String printCostMatrix (double[][] CostMatrix, char[] queryChar, char[] targetChar)
  {
    int line, col;
    String output = "\t";
   
    for (col=0; col<=targetChar.length; col++)
      if (col==0) output += "["+col+"]\t";
      else output += "["+targetChar[col-1]+"]\t";
    for (line=0; line<=queryChar.length; line++)
    {
      if (line==0) output += "\n["+line+"]\t";
      else output += "\n["+queryChar[line-1]+"]\t";
      for (col=0; col<=targetChar.length; col++)
        output += CostMatrix[line][col]+"\t";
    }
    output += "\ndelta[Edit] = "+CostMatrix[line-1][col-1]+"\n";
    return output;
  }

 
  /** Computes the optimal alignment of the two sequences and constructs a String
    * representing the alignment as well as a new Alignment object, which can be
    * used in different ways by BioJava.
    *  
    * @param query
    * @param target
    * @return alignment string representation
    */
  private String optimalAlignment(Sequence query, Sequence target) throws BioException
  {
    /*StringBuffer[] align = new StringBuffer[] {new StringBuffer(CostMatrix.length), new StringBuffer(CostMatrix[0].length)};
    StringBuffer path = new StringBuffer("");//*/
    
    String[] align = new String[] {"", ""};
    String path = "";//*/ 
    
    String output = "";
    int j = this.CostMatrix[CostMatrix.length - 1].length -1;
    SymbolTokenization st = alpha.getTokenization("default");

    for (int i = this.CostMatrix.length - 1; i>0; )
    {
      do { 
        // From now only Insert possible.
        if (i == 0) {
          align[0] = '~'     + align[0]; // st.tokenizeSymbol(alpha.getGapSymbol()) + align[0]; //
          align[1] = st.tokenizeSymbol(target.symbolAt(j--)) + align[1];
          path = ' ' + path;//*/
          
        // From now only Delete possible.
        } else if (j == 0) { 
          align[0] = st.tokenizeSymbol(query.symbolAt(i--))  + align[0];
          align[1] = '~'     + align[1]; // st.tokenizeSymbol(alpha.getGapSymbol()) + align[1]; //
          path = ' ' + path;//*/
          
        // Match/Replace
        } else if (CostMatrix[i-1][j-1] == min(CostMatrix[i][j-1], CostMatrix[i-1][j-1], CostMatrix[i-1][j]))  { 
          align[0] = st.tokenizeSymbol(query.symbolAt(i--)) + align[0];
          align[1] = st.tokenizeSymbol(target.symbolAt(j--)) + align[1];//*/
          if (query.symbolAt(i+1) == target.symbolAt(j+1))
            path = '|' + path;//*/ 
          else path = ' ' + path;//*/
          
        // Insert
        } else if ((CostMatrix[i][j-1] < CostMatrix[i-1][j-1]) && (CostMatrix[i][j-1] < CostMatrix[i-1][j])) {
          align[0] = '-'     + align[0]; // st.tokenizeSymbol(alpha.getGapSymbol()) + align[0]; //
          align[1] = st.tokenizeSymbol(target.symbolAt(j--)) + align[1];
          path = ' ' + path;//*/
  
        // Delete
        } else { 
          align[0] = st.tokenizeSymbol(query.symbolAt(i--))  + align[0];
          align[1] = '-'     + align[1]; // st.tokenizeSymbol(alpha.getGapSymbol()) + align[1]; //
          path = ' ' + path;//*/
        }
      } while (j>0);
    }

    // construct the biojava Alignment object:    
    query = new SimpleGappedSequence(
      new SimpleSequence(
        new SimpleSymbolList(query.getAlphabet().getTokenization("token"), align[0]), 
        query.getURN(), 
        query.getName(), 
        query.getAnnotation()));
    target = new SimpleGappedSequence(
      new SimpleSequence(
        new SimpleSymbolList(target.getAlphabet().getTokenization("token"), align[1]),
        target.getURN(),
        target.getName(),
        target.getAnnotation()));
    Map m = new HashMap();
    m.put(query.getName(), query);
    m.put(target.getName(), target);
    pairalign = new SimpleAlignment(m);
        
    output += "Length:\t"+align[0].length()+"\n";
    output += "Score:\t\t"+(-1)*getEditDistance()+"\n\n";
        
    int currline = Math.min(60, align[0].length()); 
    // counts the absoulute position in the String.

    output += "\nQuery:\t"  + align[0].substring(0, currline); 
    output += "\n\t"        + path.substring(0, currline);
    output += "\nTarget:\t" + align[1].substring(0, currline)+"\n";
    
    for (; currline+60 < align[0].length(); currline+=60) {
      output += "\nQuery:\t"  + align[0].substring(currline, currline+60);
      output += "\n\t"        + path.substring(currline, currline+60);
      output += "\nTarget:\t" + align[1].substring(currline, currline+60)+"\n";
    }
    if (currline+1 < align[0].length()) {
      output += "\nQuery:\t"  + align[0].substring(currline, align[0].length());
      output += "\n\t"        + path.substring(currline, path.length());
      output += "\nTarget:\t" + align[1].substring(currline, align[1].length())+"\n";
    }

    output += "\n\n";
 
    return output;    
  }
  
  /** prints the alignment String on the screen.
    * @param align
    */
  public static void printAlignment(String align)
  { 
    System.out.print(align);
  }
  
  
  /**
    * @return Alignment object containing the two gapped sequences constructed from query and target.
    * @throws Exception
    */
  public Alignment getAlignment(Sequence query, Sequence target) throws Exception
  {
    pairwiseAlignment(query, target);
    return pairalign;  
  }
  
  /** 
    * @return returns the edit_distance computed with the given parameters.
    */
  public double getEditDistance()
  {
    return CostMatrix[CostMatrix.length-1][CostMatrix[CostMatrix.length-1].length - 1];
  }

  
  /** 
    * @param x
    * @param y
    * @param z
    * @return Gives the minimum of three doubles
    */
  protected static double min (double x, double y, double z)
  {
    if ((x < y) && (x < z)) return x;
    if (y < z) return y;
    return z;
  }


  /* (non-Javadoc)
   * @see toolbox.align.SequenceAlignment#getAlignment()
   */
  public String getAlignmentString() throws BioException
  {
    return alignment;
  }
  
  
  /* (non-Javadoc)
   * @see toolbox.align.SequenceAlignment#alignAll(org.biojava.bio.seq.SequenceIterator, org.biojava.bio.seq.db.SequenceDB)
   */
  public List alignAll(SequenceIterator source, SequenceDB subjectDB) throws NoSuchElementException, BioException
  {
    List l = new LinkedList();
    while (source.hasNext()) {
  	  Sequence query = source.nextSequence();
  	  // compare all the sequences of both sets.
  	  SequenceIterator target = subjectDB.sequenceIterator();
      while (target.hasNext()) 
        try {
          l.add(getAlignment(query, target.nextSequence()));
          //pairwiseAlignment(query, target.nextSequence());
        } catch (Exception exc) {
          exc.printStackTrace();
        }
    }
    return l;
  }


  /** Global pairwise sequence alginment of two BioJava-Sequence objects according to the
    * Needleman-Wunsch-algorithm. 
    *
    * @see toolbox.align.SequenceAlignment#pairAlign(org.biojava.bio.seq.Sequence, org.biojava.bio.seq.Sequence)
    */
  public double pairwiseAlignment(Sequence query, Sequence subject)
  {
    long time1 = System.currentTimeMillis();
    
	int   i, j;
    double matchReplace;
    this.CostMatrix   = new double[query.length()+1][subject.length()+1]; // Matrix CostMatrix

    // construct the matrix:
    CostMatrix[0][0] = 0;
    
    /* If we want to have affine gap penalties, we have to initialise additional matrices:
     * If this is not necessary, we won't do that (because it's expensive).
     */
    if ((gapExt != delete) || (gapExt != insert)) {
      
      double[][] E = new double[query.length()+1][subject.length()+1];	// Inserts
      double[][] F = new double[query.length()+1][subject.length()+1];	// Deletes
      double[][] G = new double[query.length()+1][subject.length()+1];	// Match/Replace
      
      G[0][0] = 0;
      E[0][0] = F[0][0]   = Double.MAX_VALUE;
      for (i=1; i<=query.length();   i++) {
        CostMatrix[i][0]  = CostMatrix[i-1][0] + delete;
        G[i][0] = E[i][0] = Double.MAX_VALUE;
        F[i][0] = delete;
      }
      for (j=1; j<=subject.length(); j++) {
        CostMatrix[0][j]  = CostMatrix[0][j-1] + insert;
        G[0][j] = F[0][j] = Double.MAX_VALUE;
        E[0][j] = insert;
      }
      for (i=1; i<=query.length();   i++)
        for (j=1; j<=subject.length(); j++)
        {
          try {
            matchReplace = subMatrix.getValueAt(query.symbolAt(i), subject.symbolAt(j));
          } catch (IndexOutOfBoundsException exc) {
            exc.printStackTrace();
          } catch (BioException exc) {
            System.err.println(exc.getMessage());
          } finally {
            if (query.symbolAt(i).getMatches().contains(subject.symbolAt(j)) ||
                subject.symbolAt(j).getMatches().contains(query.symbolAt(i)))
              matchReplace = match; //0;
            else matchReplace = replace; //delete/2 + insert/2;
          }
          G[i][j] = CostMatrix[i-1][j-1] - matchReplace;
          E[i][j] = Math.min(E[i][j-1], CostMatrix[i][j-1] + insert) + gapExt;
          F[i][j] = Math.min(F[i-1][j], CostMatrix[i-1][j] + delete) + gapExt;
          CostMatrix[i][j] = min (E[i][j], F[i][j], G[i][j]);
        }
      
    } else {
      /*
       * No affine gap penalties, constant gap penalties, which is much faster and needs less memory.
       */
      for (i=1; i<=query.length();   i++) CostMatrix[i][0] = CostMatrix[i-1][0] + delete;
      for (j=1; j<=subject.length(); j++) CostMatrix[0][j] = CostMatrix[0][j-1] + insert;
      for (i=1; i<=query.length();   i++)
        for (j=1; j<=subject.length(); j++)
        {        
          try {
            matchReplace = subMatrix.getValueAt(query.symbolAt(i), subject.symbolAt(j));
          } catch (IndexOutOfBoundsException exc) {
            exc.printStackTrace();
          } catch (BioException exc) {
            System.err.println(exc.getMessage());
          } finally {
            if (query.symbolAt(i).getMatches().contains(subject.symbolAt(j)) ||
                subject.symbolAt(j).getMatches().contains(query.symbolAt(i)))
              matchReplace = match; //0;
            else matchReplace = replace; //delete/2 + insert/2;
          }
          CostMatrix[i][j] = min (
            CostMatrix[i-1][j]   + delete, 
            CostMatrix[i][j-1]   + insert, 
            CostMatrix[i-1][j-1] - matchReplace);
        }
    } 
    // Everything is the same from here:
    try {
      //this.printCostMatrix(query.seqString().toCharArray(), target.seqString().toCharArray());
      // generate "String" with optimal Alignment.
      this.alignment += this.optimalAlignment(query, subject) + "\n";
      // print the Alignment on the screen
      this.printAlignment(alignment);		
    } catch (BioException exc) {
      exc.printStackTrace();
    }    
    
    time1 = System.currentTimeMillis() - time1;
    System.out.println("Zeit:\t"+time1);
    
    return getEditDistance();
  }


}
_______________________________________________
Biojava-l mailing list  -  Biojava-l@biojava.org
http://biojava.org/mailman/listinfo/biojava-l

Reply via email to