In working with FlexibleAlignment I've found a problem. When an overlap is empty the underlying code in AbstractULAlignment will return nulls for positions not containing symbols, and this confuses things later on. An example program below will demonstrate a simple case of the problem (code stolen from TestSimpleAlignment.java and modified).
Is there a symbol that should be returned for this case? It should be distinct from the gap symbol as areas outside the aligning sequence isn't gap space, it just doesn't exist. Some comments in the list archive suggest that a space was going to be returned, but I can't see an attempt to implement that (it's not added to the alphabet, as an example) and right now a null is returned. package symbol; import java.util.ArrayList; import java.util.Iterator; import org.biojava.bio.alignment.FlexibleAlignment; import org.biojava.bio.alignment.SimpleAlignmentElement; import org.biojava.bio.symbol.RangeLocation; import org.biojava.bio.symbol.SymbolList; public class TestFlexibleAlignment { public static void main(String[] args) { try { // make three random sequences SymbolList res1 = Tools.createSymbolList(10); SymbolList res2 = Tools.createSymbolList(10); SymbolList res3 = Tools.createSymbolList(10); // think of three names String name1 = "pigs"; String name2 = "dogs"; String name3 = "cats"; // create list with reference sequence ArrayList list = new ArrayList(1); SymbolList refSeq = Tools.createSymbolList(30); list.add(new SimpleAlignmentElement("reference", refSeq, new RangeLocation(1, 30))); // create the alignment with the reference sequence FlexibleAlignment ali = new FlexibleAlignment(list); // add the sequences as alignments ali.addSequence(new SimpleAlignmentElement(name1, res1, new RangeLocation(1, 10))); ali.addSequence(new SimpleAlignmentElement(name2, res2, new RangeLocation(11, 20))); ali.addSequence(new SimpleAlignmentElement(name3, res3, new RangeLocation(21, 30))); // print out each row in the alignment System.out.println("Sequences in alignment"); for(Iterator i = ali.getLabels().iterator(); i.hasNext(); ) { String label = (String) i.next(); SymbolList rl = ali.symbolListForLabel(label); System.out.println(label + ":\t" + rl.seqString()); } System.out.flush(); // print out each column System.out.println("Columns"); for(int i = 1; i <= ali.length(); i++) { System.out.println(i + ":\t" + ali.symbolAt(i).getName()); } } catch (Exception ex) { ex.printStackTrace(System.err); System.exit(1); } } } _______________________________________________ Biojava-l mailing list - Biojava-l@biojava.org http://biojava.org/mailman/listinfo/biojava-l