/*
 *                    BioJava development code
 *
 * This code may be freely distributed and modified under the
 * terms of the GNU Lesser General Public Licence.  This should
 * be distributed with the code.  If you do not have a copy,
 * see:
 *
 *      http://www.gnu.org/copyleft/lesser.html
 *
 * Copyright for this code is held jointly by the individual
 * authors.  These should be listed in @author doc comments.
 *
 * For more information on the BioJava project and its aims,
 * or to join the biojava-l mailing list, visit the home page
 * at:
 *
 *      http://www.biojava.org/
 *
 */


package testbed;

import org.biojava.bio.*;
import org.biojava.utils.*;
import org.biojava.bio.dist.*;
import org.biojava.bio.seq.*;
import org.biojava.bio.symbol.*;
import java.util.*;

/**
 * Title:        CrossProductTest
 * Description:  A test of the nmer alphabet and distribution concepts
 *
 * This program demonstrates the use of crossproduct (nmer) alphabets and
 * distributions. A codon distribution is created from a sequence. This
 * distribution is them used to generate another random sequence. The probality
 * of this new sequence is then calculated. This program also demonstrates
 * how a cross product alphabet may be displayed to STDOUT.
 *
 * Thanks to Matthew and Thomas for hints and suggestions.
 *
 * @author       Mark Schreiber
 * @version 1.0
 */

public class CrossProductTest {

  double prob = 1.0; //emmission probability

  public CrossProductTest() throws NestedException {
    try{
      //create a cross product of three dna alphabets ie a codon alphabet.
      Alphabet tri = AlphabetManager.getCrossProductAlphabet(
                                      Collections.nCopies(3,DNATools.getDNA()));


      //create a distribution for the alphabet and a trainer.
      Distribution d = DistributionFactory.DEFAULT.createDistribution(tri);
      DistributionTrainer dt = new SimpleDistributionTrainer(d);
      DistributionTrainerContext context = new SimpleDistributionTrainerContext();

      //create a dna sequence.
      SymbolList seq = DNATools.createDNA(
        "atgatgatggtggcggaggatgggcgcgcggtggaaacaacaattaca" +
        "tagcaccccataccaatagacacagatggcggtgtgaacagataagac" +
        "gcttagacacaaatgacacacggggccggggaatatttttaaatacaa" +
        "cggctctctttataggcgcgcctttaaatataggcgcgcgcgggccta" +
        "tttataaatatttttagaccacacccatatcatacgacaagaagccat" +
        "ccaaatacggataacacccctagaggggaaccccgttatattttacac"
      );

      //create a trimer view on the sequence.
      SymbolList subseq = SymbolListViews.windowedSymbolList(seq, 3);

      //add trimer counts to the distribution.
      Iterator iter = subseq.iterator();
      while (iter.hasNext()) {
        Object item = iter.next();
        dt.addCount(context,(AtomicSymbol)item,1.0);
      }
      //train the model using the weights given.
      dt.train(0.0); //No psuedo-counts to nullModel.

      for (int i = 1; i <= 20; i++) { // generate a new sequence
        Symbol sym = d.sampleSymbol();
        //get the symbols that make up sym.
        List syms = ((BasisSymbol)sym).getSymbols();
        //print the codon
        iter = syms.iterator();
        while (iter.hasNext()) {
          Symbol s  = (Symbol)iter.next();
          System.out.print(s.getToken());
        }
        //get the probability of the emmission so far
        prob *= d.getWeight(sym);
      }
      System.out.println("\nProbablity of emission = " + prob);

    }catch(Exception e){
      throw new NestedException(e);
    }
  }
  public static void main(String[] args) {
    try{
      CrossProductTest crossProductTest1 = new CrossProductTest();
    }catch(NestedException ne){
      ne.printStackTrace(System.out);
    }
  }
}