Thanks - Give me a few days and I'll put this up. If I don't please nag me :)
- Mark facemann <[EMAIL PROTECTED]> Sent by: [EMAIL PROTECTED] 03/06/2004 11:09 AM To: [EMAIL PROTECTED] cc: Subject: [Biojava-l] Reading frames and amino acids Here is a small contribution. I use it to find simple motifs. Feel free to edit or scrap. /** *MotifLister.java *Andy Hammer *08 Aug 2003 *Lists all instances of a motif in specified (dna\rna\protein) fasta file. *The motif can contain Ambiguity symbols *Lists the ORF title and position of motif *Outputs a list of counts to stdout. */ import java.io.*; import java.util.*; import java.util.regex.*; import org.biojava.bio.*; import org.biojava.bio.seq.*; import org.biojava.bio.seq.io.*; import org.biojava.bio.symbol.*; public class MotifLister{ public MotifLister(String type, String inputFile, String target, String placement)throws Exception{ System.out.println("MotifLister is searching file " + inputFile + " for the motif '" + target + "' in frame " + placement + "."); try{ if(type.equalsIgnoreCase("dna")){ motif = DNATools.createDNA(target); }else if(type.equalsIgnoreCase("rna")){ motif = RNATools.createRNA(target); }else{ motif = ProteinTools.createProtein(target); } } catch(BioError e){ System.out.println("Error!! Data type must match type of motif."); System.out.println("Specifically, " + target + " is not " + type); System.exit(0); } Pattern p = Pattern.compile( MotifTools.createRegex(motif) ); frame = Integer.parseInt(placement); if(frame < 0 || frame > 3){ System.out.println("Only frames 0 through 3 are alloweds"); System.out.println("frame zero searches all frames."); System.exit(0); } count = 0; //read the file //input FileInputStream fis = new FileInputStream(inputFile); InputStreamReader isr = new InputStreamReader(fis); BufferedReader input = new BufferedReader(isr); try{ if(type.equalsIgnoreCase("dna")){ si = SeqIOTools.readFastaDNA(input); }else if(type.equalsIgnoreCase("rna")){ si = SeqIOTools.readFastaRNA(input); }else{ si = SeqIOTools.readFastaProtein(input); } while (si.hasNext()){ Sequence seq = si.nextSequence(); Matcher matcher = p.matcher(seq.seqString()); int start = 0; while(matcher.find(start)) { start = matcher.start(); int end = matcher.end(); int result = (start % 3) + 1; if(result == frame || frame == 0){ System.out.println(seq.getName() + " : " + "[" + (start + 1) + "," + (end) + "]"); count++; } start++; } } input.close(); //close the file System.out.println("Total Hits = " + count); } catch(BioException e){ System.out.println(inputFile + " is not a " + type + " file."); System.out.println(e); } } public static void main(String[] args)throws Exception{ if (args.length < 4) { System.err.println(" Usage: >java -jar MotifLister.jar type fastaFile motif frame" + "\n Ex: >java -jar MotifLister.jar dna eColi.fasta AAAAAAG 3 > output.txt" + "\n would search for A AAA AAG in the third frame in dna file eColi.fasta" + "\n and print the results to file output.txt." + "\n 'type' can be dna, rna, or protein." + "\n 'frame' can be integers 0 through 3." + "\n 0 counts any instance of the motif." + "\n 1, 2, 3 counts only instances of the motif in the specified frame." + "\n Capture output with redirection operator '>'."); }else{ MotifLister ML = new MotifLister(args[0], args[1], args[2], args[3]); } } private SymbolList motif; private int frame; private int count; private SequenceIterator si; } --------------------------------- Do you Yahoo!? Yahoo! Search - Find what you're looking for faster._______________________________________________ Biojava-l mailing list - [EMAIL PROTECTED] http://biojava.org/mailman/listinfo/biojava-l _______________________________________________ Biojava-l mailing list - [EMAIL PROTECTED] http://biojava.org/mailman/listinfo/biojava-l