Sorry for the wrong reply...
Here is the FULL code I marked the passages that are important in red:
Thanks for looking at it!!!!
Bernd
package org.pasteur.pf2.biojava;
import java.io.BufferedReader;
import java.io.File;
import java.io.FileReader;
import java.io.IOException;
import java.util.Iterator;
import java.util.NoSuchElementException;
import org.biojava.bio.BioException;
import org.biojava.bio.seq.Sequence;
import org.biojava.bio.seq.SequenceIterator;
import org.biojava.bio.seq.io.SeqIOTools;
import org.biojava.bio.seq.io.SymbolTokenization;
import org.biojava.bio.symbol.Alphabet;
import org.biojava.bio.symbol.AlphabetManager;
import org.biojava.bio.symbol.SymbolList;
import org.biojavax.RichObjectFactory;
import org.biojavax.bio.seq.io.RichSequenceFormat;
import org.knime.core.data.DataCell;
import org.knime.core.data.DataColumnSpec;
import org.knime.core.data.DataColumnSpecCreator;
import org.knime.core.data.DataTableSpec;
import org.knime.core.data.RowKey;
import org.knime.core.data.container.BlobDataCell;
import org.knime.core.data.def.DefaultRow;
import org.knime.core.data.def.StringCell;
import org.knime.core.node.BufferedDataContainer;
import org.knime.core.node.BufferedDataTable;
import org.knime.core.node.CanceledExecutionException;
import org.knime.core.node.ExecutionContext;
import org.knime.core.node.ExecutionMonitor;
import org.knime.core.node.InvalidSettingsException;
import org.knime.core.node.NodeLogger;
import org.knime.core.node.NodeModel;
import org.knime.core.node.NodeSettingsRO;
import org.knime.core.node.NodeSettingsWO;
import org.knime.core.node.defaultnodesettings.SettingsModelString;
import org.biojavax.bio.seq.io.EMBLFormat;
import org.biojavax.bio.seq.io.FastaFormat;
import org.biojavax.bio.seq.io.GenbankFormat;
import org.biojavax.bio.seq.io.INSDseqFormat;
import org.biojavax.bio.seq.io.RichSequenceBuilderFactory;
import org.biojavax.bio.seq.io.RichSequenceFormat;
import org.biojavax.bio.seq.io.RichStreamReader;
import org.biojavax.bio.seq.io.UniProtFormat;
import org.pasteur.pf2.datatypes.*;
/**
* This is the model implementation of FastAReader. Reads a FASTA file
into two
* columns: seq_name and sequence
*
* @author Bernd Jagla
*/
@SuppressWarnings("deprecation")
public class FastAReaderNodeModel extends NodeModel {
// the logger instance
private static final NodeLogger logger = NodeLogger
.getLogger(FastQReaderNodeModel.class);
private Alphabet alpha;
private SequenceIterator iter;
/**
* the settings key which is used to retrieve and store the
settings (from
* the dialog or from a settings file) (package visibility to be
usable from
* the dialog).
*/
private static final String FAR_name = "far_name";
private static final String FAR_fileFormat = "far_ff";
private static final String FAR_alphabet = "far_alph";
private final SettingsModelString m_fpname = createFAR_fpname();
private final SettingsModelString m_fformat = createFileFormat();
private final SettingsModelString m_alphabet = createAlphabet();
/**
* Constructor for the node model.
*/
protected FastAReaderNodeModel() {
super(0, 1);
}
/**
* {...@inheritdoc}
*/
@Override
protected BufferedDataTable[] execute(final BufferedDataTable[] inData,
final ExecutionContext exec) throws Exception {
// TODO do something here
logger.info("Node Model Stub... this is not yet implemented !");
// the data table spec of the single output table,
// the table will have three columns:
DataColumnSpec[] allColSpecs = new DataColumnSpec[1];
allColSpecs[0] = new DataColumnSpecCreator("sequence",
SequenceDataCell.TYPE)
.createSpec();
DataTableSpec outputSpec = new DataTableSpec(allColSpecs);
// the execution context will provide us with storage capacity,
in this
// case a data container to which we will add rows sequentially
// Note, this container can also handle arbitrary big data
tables, it
// will buffer to disc if necessary.
BufferedDataContainer container =
exec.createDataContainer(outputSpec);
// let's add m_count rows to it
// once we are done, we close the container and return its table
FileReader fp = new FileReader(m_fpname.getStringValue());
exec.checkCanceled();
//String form = m_fformat.getStringValue();
//String alphabet = m_alphabet.getStringValue();
String form = "genbank";
String alphabet = "DNA";
BufferedReader br = new BufferedReader(fp);
// String line = br.readLine();
int count = 0;
SequenceIterator iter = (SequenceIterator)
SeqIOTools.fileToBiojava(
form, alphabet, br);
while (iter.hasNext()) {
exec.checkCanceled();
RowKey key = new RowKey("Row " + count);
exec.setProgress("Row " + count);
// System.out.println(fastq.getSequence());
Sequence seq = iter.nextSequence();
String seqName = seq.getName();
// String seqName = "asdf";
//String sequence = seq.seqString();
System.err.println("reading: " + seqName + " " + seq.length());
SequenceDataCell seqCell = new SequenceDataCell(seqName, seq);
container.addRowToTable(new DefaultRow(key, seqCell));
count++;
}
System.err.println("finished reading file");
br.close();
fp.close();
container.close();
return new BufferedDataTable[] { container.getTable() };
}
/**
* Makes a <code>SequenceIterator</code> look like an
* <code>Iterator {...@code <Sequence>}</code>
*
* @param iter
* The <CODE>SequenceIterator</CODE>
* @return An <CODE>Iterator</CODE> that returns only
<CODE>Sequence</CODE>
* objects. <B>You cannot call <code>remove()</code> on this
* iterator!</B>
*/
public Iterator<Sequence> asIterator(SequenceIterator iter) {
final SequenceIterator it = iter;
return new Iterator<Sequence>() {
public boolean hasNext() {
return it.hasNext();
}
public Sequence next() {
try {
return it.nextSequence();
} catch (BioException e) {
NoSuchElementException ex = new
NoSuchElementException();
ex.initCause(e);
throw ex;
}
}
public void remove() {
throw new UnsupportedOperationException();
}
};
}
public static RichSequenceFormat formatForName(String name)
throws ClassNotFoundException, InstantiationException,
IllegalAccessException {
// determine the format to use
RichSequenceFormat format;
if (name.equalsIgnoreCase("fasta")) {
format = (RichSequenceFormat) new FastaFormat();
} else if (name.equalsIgnoreCase("genbank")) {
format = (RichSequenceFormat) new GenbankFormat();
} else if (name.equalsIgnoreCase("uniprot")) {
format = new UniProtFormat();
} else if (name.equalsIgnoreCase("embl")) {
format = new EMBLFormat();
} else if (name.equalsIgnoreCase("INSDseq")) {
format = new INSDseqFormat();
} else {
Class formatClass = Class.forName(name);
format = (RichSequenceFormat) formatClass.newInstance();
}
return format;
}
/**
* {...@inheritdoc}
*/
@Override
protected void reset() {
}
/**
* {...@inheritdoc}
*/
@Override
protected DataTableSpec[] configure(final DataTableSpec[] inSpecs)
throws InvalidSettingsException {
DataColumnSpec[] allColSpecs = new DataColumnSpec[1];
allColSpecs[0] = new DataColumnSpecCreator("sequence",
SequenceDataCell.TYPE)
.createSpec();
DataTableSpec outputSpec = new DataTableSpec(allColSpecs);
return new DataTableSpec[] { outputSpec };
}
/**
* {...@inheritdoc}
*/
@Override
protected void saveSettingsTo(final NodeSettingsWO settings) {
m_alphabet.saveSettingsTo(settings);
m_fformat.saveSettingsTo(settings);
m_fpname.saveSettingsTo(settings);
}
/**
* {...@inheritdoc}
*/
@Override
protected void loadValidatedSettingsFrom(final NodeSettingsRO settings)
throws InvalidSettingsException {
m_alphabet.loadSettingsFrom(settings);
m_fformat.loadSettingsFrom(settings);
m_fpname.loadSettingsFrom(settings);
}
/**
* {...@inheritdoc}
*/
@Override
protected void validateSettings(final NodeSettingsRO settings)
throws InvalidSettingsException {
m_alphabet.validateSettings(settings);
m_fformat.validateSettings(settings);
m_fpname.validateSettings(settings);
}
/**
* {...@inheritdoc}
*/
@Override
protected void loadInternals(final File internDir,
final ExecutionMonitor exec) throws IOException,
CanceledExecutionException {
}
/**
* {...@inheritdoc}
*/
@Override
protected void saveInternals(final File internDir,
final ExecutionMonitor exec) throws IOException,
CanceledExecutionException {
}
public static SettingsModelString createFAR_fpname() {
return new SettingsModelString(FAR_name, "");
}
public static SettingsModelString createFileFormat() {
return new SettingsModelString(FAR_fileFormat, "FASTA");
}
public static SettingsModelString createAlphabet() {
return new SettingsModelString(FAR_alphabet, "RNA");
}
}
On 9/21/2010 2:40 PM, simon rayner wrote:
hi,
can you repost to the biojava group along with the full code, (just in
case there is a missing import or something). you only replied to,
and not to the biojava mailing list
thanks
simon
On Tue, Sep 21, 2010 at 8:18 PM, Bernd Jagla <[email protected]
<mailto:[email protected]>> wrote:
Thanks for the quick reply!
Here is some code that should have all the important parts:
String form = "genbank";
String alphabet = "dna";
BufferedReader br = new BufferedReader(fp);
SequenceIterator iter = (SequenceIterator) SeqIOTools.fileToBiojava(
form, alphabet, br);
while (iter.hasNext()) {
Sequence seq = iter.nextSequence();
=> Exception thrown
String seqName = seq.getName();
}
When trying to simplify the code a bit I now get the following error:
Execute failed: Could not initialize class
org.biojava.bio.seq.FeatureFilter
I assume that in the previous times I had a spelling error??
Then the exception got thrown during the initialization of "iter"
Thanks,
Bernd
On 9/21/2010 2:07 PM, simon rayner wrote:
hi,
can you post the code you are trying to run along with the full
error, it will help to figure out what is happening. There are
now loaders for biojavax as well, which work well which are
available in the biojavax docs here
http://biojava.org/wiki/BioJava:BioJavaXDocs#Example
but yeah, it's confusing unless you happen to be a real java
guru. i keep having to refer back to the docs because i keep
forgeting which class does what
On Tue, Sep 21, 2010 at 7:46 PM, Bernd Jagla
<[email protected] <mailto:[email protected]>> wrote:
Hello,
I am getting a little frustrated with the wiki page (I guess
I don't spend enough time reading and testing). I have the
impression that some of the documentation relates to version
3 whereas others relate to 1.5 or 1.7.
So sorry if this all sounds a bit confused... ;(
I believe I am using 1.7.1. (I wasn't able to find a readme
file that contains that information) even though I would
probably like to use version 3. But as I am stuck with an
older Eclipse version I think it will be even worse when I
try that.
Anyways, I am trying to read in sequence files using
SeqIOTools.fileToBiojava, which seems to be deprecated, with
the following parameters: "genbank", "dna", bufferedReader.
somehow this works with "fasta" but with genbank I get the
following exception:
Execute failed: Unknown file type '524300'
in some cases I get:
Unknown file type '262156'
Does this mean anything to you?
Or how do you read in a sequence file? I am looking for a
generic way that covers many file types (genbank, fasta,
swissprot...)
Once I have this I will probably be able to get to the
feature information using the information from the tutorial.
Thanks for your time.
Bernd
_______________________________________________
Biojava-l mailing list - [email protected]
<mailto:[email protected]>
http://lists.open-bio.org/mailman/listinfo/biojava-l
--
Simon Rayner
State Key Laboratory of Virology
Wuhan Institute of Virology
Chinese Academy of Sciences
Wuhan, Hubei 430071
P.R.China
+86 (27) 87199895 (office)
+86 18627113001 (cell)
--
Simon Rayner
State Key Laboratory of Virology
Wuhan Institute of Virology
Chinese Academy of Sciences
Wuhan, Hubei 430071
P.R.China
+86 (27) 87199895 (office)
+86 18627113001 (cell)
_______________________________________________
Biojava-l mailing list - [email protected]
http://lists.open-bio.org/mailman/listinfo/biojava-l