I wrote the attached location parser JUnit test case while trying some alternative location representations. The test case parses short EMBL records strings and tests to see if the parsed Location object is what was expected. Please use it in BioJava if it's suitable.
Regards, Brian __________________________________________________ Do You Yahoo!? HotJobs - Search Thousands of New Jobs http://www.hotjobs.com
/* * BioJava development code * * This code may be freely distributed and modified under the * terms of the GNU Lesser General Public Licence. This should * be distributed with the code. If you do not have a copy, * see: * * http://www.gnu.org/copyleft/lesser.html * * Copyright for this code is held jointly by the individual * authors. These should be listed in @author doc comments. * * For more information on the BioJava project and its aims, * or to join the biojava-l mailing list, visit the home page * at: * * http://www.biojava.org/ * */ package org.biojava.bio.seq.io; import java.io.*; import java.util.Iterator; import junit.framework.*; import org.biojava.bio.seq.Feature; import org.biojava.bio.seq.Sequence; import org.biojava.bio.seq.SequenceIterator; import org.biojava.bio.seq.StrandedFeature; import org.biojava.bio.seq.io.SeqIOTools; import org.biojava.bio.symbol.FuzzyLocation; import org.biojava.bio.symbol.Location; import org.biojava.bio.symbol.PointLocation; /** * Test parsing of GenBank/EMBL/DDBJ location strings. Test reads * example records from a StringReader. * * @author Brian King ([EMAIL PROTECTED], [EMAIL PROTECTED]) */ public class LocationParserTest extends TestCase { /** * Constructor. * * @param name The name of the test case * @see TestCase */ public LocationParserTest(String name) { super(name); } /** * Runs the unit tests defined here. */ public static void main(String args[]) { junit.textui.TestRunner.run(LocationParserTest.class); } /** * Test parse of a point location: "variation 1896" * */ public void testPoint() throws Exception { // create initial data model from record in EMBL format // extracted from AB000360. String doc = "ID AB000360 standard; DNA; HUM; 2582 BP.\n" + "AC AB000360 \n" + // gives Sequence object a URN "FH Key Location/Qualifiers\n" + "FH \n" + "FT variation 1896\n" + "//"; BufferedReader r = new BufferedReader(new StringReader(doc)); SequenceIterator seqItr = SeqIOTools.readEmbl(r); Sequence seq = seqItr.nextSequence(); Iterator itr = seq.features(); StrandedFeature f = (StrandedFeature) itr.next(); PointLocation loc = (PointLocation) f.getLocation(); assertEquals("Feature should be in POSITIVE orientation.", StrandedFeature.POSITIVE, f.getStrand()); if (!loc.isContiguous()) { fail("Location should be contiguous"); // can't use JUnit assert() because "assert" is a JDK 1.4 keyword } assertEquals("Start of point incorrect.", 1896, // expected loc.getMin()); // actual assertEquals("End of point incorrect.", 1896, // expected loc.getMax()); // actual } /** * Test the most common case (start..end) */ public void testRange() throws Exception { // create initial data model from record in EMBL format // String doc = "ID ABCDEFGH standard; DNA; HUM; 4238 BP.\n" + "AC ABCDEFGH \n" + // gives Sequence object a URN "FH Key Location/Qualifiers\n" + "FH \n" + "FT polyA_signal 4183..4188\n" + "//"; BufferedReader r = new BufferedReader(new StringReader(doc)); SequenceIterator seqItr = SeqIOTools.readEmbl(r); Sequence seq = seqItr.nextSequence(); Iterator itr = seq.features(); StrandedFeature f = (StrandedFeature) itr.next(); Location loc = f.getLocation(); assertEquals("Feature should be in POSITIVE orientation.", StrandedFeature.POSITIVE, f.getStrand()); if (!loc.isContiguous()) { fail("Location should be contiguous"); // can't use JUnit assert() because "assert" is a JDK 1.4 keyword } assertEquals("Start of range incorrect.", 4183, // expected loc.getMin()); // actual assertEquals("End of range incorrect.", 4188, // expected loc.getMax()); // actual } /** * <1..99 * */ public void testFuzzy() throws Exception { // create initial data model from record in EMBL format // String doc = "ID ABCDEFGH standard; DNA; HUM; 4238 BP.\n" + "AC ABCDEFGH \n" + // gives Sequence object a URN "FH Key Location/Qualifiers\n" + "FH \n" + "FT intron <1..99\n" + "//"; BufferedReader r = new BufferedReader(new StringReader(doc)); SequenceIterator seqItr = SeqIOTools.readEmbl(r); Sequence seq = seqItr.nextSequence(); Iterator itr = seq.features(); StrandedFeature f = (StrandedFeature) itr.next(); FuzzyLocation loc = (FuzzyLocation) f.getLocation(); assertEquals("Feature should be in POSITIVE orientation.", StrandedFeature.POSITIVE, f.getStrand()); if (!loc.isContiguous()) { fail("Location should be contiguous"); // can't use JUnit assert() because "assert" is a JDK 1.4 keyword } if (!loc.isMinFuzzy()) { fail("Location min should be fuzzy"); } if (loc.isMaxFuzzy()) { fail("Location max should not be fuzzy"); } assertEquals("Start of range incorrect.", 1, // expected loc.getMin()); // actual assertEquals("End of range incorrect.", 99, // expected loc.getMax()); // actual } /** * join(2762..2959,3175..3319) * */ public void testJoin() throws Exception { // create initial data model from record in EMBL format // String doc = "ID ABCDEFGH standard; DNA; HUM; 4238 BP.\n" + "AC ABCDEFGH \n" + // gives Sequence object a URN "FH Key Location/Qualifiers\n" + "FH \n" + "FT CDS join(2762..2959,3175..3319)\n" + "//"; BufferedReader r = new BufferedReader(new StringReader(doc)); SequenceIterator seqItr = SeqIOTools.readEmbl(r); Sequence seq = seqItr.nextSequence(); Iterator itr = seq.features(); StrandedFeature f = (StrandedFeature) itr.next(); Location loc = f.getLocation(); assertEquals("Feature should be in POSITIVE orientation.", StrandedFeature.POSITIVE, f.getStrand()); if (loc.isContiguous()) { fail("Location should not be contiguous"); // can't use JUnit assert() because "assert" is a JDK 1.4 keyword } // test each region // itr = loc.blockIterator(); loc = (Location) itr.next(); assertEquals("Start of first region of join incorrect.", 2762, // expected loc.getMin()); // actual assertEquals("End of first region of join incorrect.", 2959, // expected loc.getMax()); // actual loc = (Location) itr.next(); assertEquals("Start of second region of join incorrect.", 3175, // expected loc.getMin()); // actual assertEquals("End of second region of join incorrect.", 3319, // expected loc.getMax()); // actual } /** * complement(4183..4188) * */ public void testComplementRange() throws Exception { // create initial data model from record in EMBL format // String doc = "ID ABCDEFGH standard; DNA; HUM; 4238 BP.\n" + "AC ABCDEFGH \n" + // gives Sequence object a URN "FH Key Location/Qualifiers\n" + "FH \n" + "FT polyA_signal complement(4183..4188)\n" + "//"; BufferedReader r = new BufferedReader(new StringReader(doc)); SequenceIterator seqItr = SeqIOTools.readEmbl(r); Sequence seq = seqItr.nextSequence(); Iterator itr = seq.features(); StrandedFeature f = (StrandedFeature) itr.next(); Location loc = f.getLocation(); assertEquals("Feature should be in NEGATIVE orientation.", StrandedFeature.NEGATIVE, f.getStrand()); if (!loc.isContiguous()) { fail("Location should be contiguous"); // can't use JUnit assert() because "assert" is a JDK 1.4 keyword } assertEquals("Start of range incorrect.", 4183, // expected loc.getMin()); // actual assertEquals("End of range incorrect.", 4188, // expected loc.getMax()); // actual } /** * Test for parsing of joins of remote locations * */ public void testRemoteJoin() throws Exception { // create initial data model. Extracted from EMBL record // AB002461 // String doc = "ID AB002461 standard; DNA; HUM; 4238 BP.\n" + "AC AB002461; \n" + "FH Key Location/Qualifiers\n" + "FH \n" + "FT CDS join(AB002455.1:2762..2959,AB002456.1:175..319,\n" + "FT AB002456.1:1517..1570,AB002456.1:1661..1788,\n" + "FT AB002457.1:449..570,AB002458.1:284..554,\n" + "FT AB002459.1:309..375,AB002459.1:555..684,\n" + "FT AB002460.1:446..579,AB002460.1:672..736,100..182,602..767)\n" + "//"; BufferedReader r = new BufferedReader(new StringReader(doc)); SequenceIterator seqItr = SeqIOTools.readEmbl(r); Sequence seq = seqItr.nextSequence(); Iterator itr = seq.features(); // SimpleRemoteFeature Feature f = (Feature) itr.next(); Location loc = f.getLocation(); /* // remote joins don't have a strand assertEquals("Feature should be in POSITIVE orientation.", StrandedFeature.POSITIVE, f.getStrand()); */ if (loc.isContiguous()) { fail("Location should not be contiguous"); // can't use JUnit assert() because "assert" is a JDK 1.4 keyword } // test each region // the given join has two local regions itr = loc.blockIterator(); loc = (Location) itr.next(); assertEquals("Start of first region of join incorrect.", 100, // expected loc.getMin()); // actual assertEquals("End of first region of join incorrect.", 182, // expected loc.getMax()); // actual loc = (Location) itr.next(); assertEquals("Start of second region of join incorrect.", 602, // expected loc.getMin()); // actual assertEquals("End of second region of join incorrect.", 767, // expected loc.getMax()); // actual } /** * Test complement(join(2762..2959,3175..3319)) * */ public void testComplementJoin() throws Exception { // create initial data model from record in EMBL format // String doc = "ID ABCDEFGH standard; DNA; HUM; 4238 BP.\n" + "AC ABCDEFGH \n" + // gives Sequence object a URN "FH Key Location/Qualifiers\n" + "FH \n" + "FT CDS complement(join(2762..2959,3175..3319))\n" + "//"; BufferedReader r = new BufferedReader(new StringReader(doc)); SequenceIterator seqItr = SeqIOTools.readEmbl(r); Sequence seq = seqItr.nextSequence(); Iterator itr = seq.features(); StrandedFeature f = (StrandedFeature) itr.next(); Location loc = f.getLocation(); assertEquals("Feature should be in NEGATIVE orientation.", StrandedFeature.NEGATIVE, f.getStrand()); if (loc.isContiguous()) { fail("Location should not be contiguous"); } // test each region // itr = loc.blockIterator(); loc = (Location) itr.next(); assertEquals("Start of first region of join incorrect.", 2762, // expected loc.getMin()); // actual assertEquals("End of first region of join incorrect.", 2959, // expected loc.getMax()); // actual loc = (Location) itr.next(); assertEquals("Start of second region of join incorrect.", 3175, // expected loc.getMin()); // actual assertEquals("End of second region of join incorrect.", 3319, // expected loc.getMax()); // actual } }
