Revision: 16219 http://gate.svn.sourceforge.net/gate/?rev=16219&view=rev Author: markagreenwood Date: 2012-11-02 07:41:13 +0000 (Fri, 02 Nov 2012) Log Message: ----------- switched over to calling the new method directly, and updated the GUI to show a drop down of known mime types -- needs updating when the set of mime types changes
Modified Paths: -------------- gate/trunk/src/gate/DocumentFormat.java gate/trunk/src/gate/SimpleCorpus.java gate/trunk/src/gate/corpora/CorpusImpl.java gate/trunk/src/gate/corpora/DocType.java gate/trunk/src/gate/corpora/SerialCorpusImpl.java gate/trunk/src/gate/gui/NameBearerHandle.java gate/trunk/src/gate/gui/SingleConcatenatedFileInputDialog.java Modified: gate/trunk/src/gate/DocumentFormat.java =================================================================== --- gate/trunk/src/gate/DocumentFormat.java 2012-11-02 02:21:08 UTC (rev 16218) +++ gate/trunk/src/gate/DocumentFormat.java 2012-11-02 07:41:13 UTC (rev 16219) @@ -163,6 +163,10 @@ if(fileSufix == null) return null; return suffixes2mimeTypeMap.get(fileSufix.toLowerCase()); }//getMimeType + + public static Set<String> getSupportedMimeTypes() { + return Collections.unmodifiableSet(mimeString2mimeTypeMap.keySet()); + } /** * Returns a MymeType having as input a URL object. If the MimeType wasn't Modified: gate/trunk/src/gate/SimpleCorpus.java =================================================================== --- gate/trunk/src/gate/SimpleCorpus.java 2012-11-02 02:21:08 UTC (rev 16218) +++ gate/trunk/src/gate/SimpleCorpus.java 2012-11-02 07:41:13 UTC (rev 16219) @@ -16,7 +16,6 @@ package gate; -import gate.corpora.DocType; import gate.creole.ResourceInstantiationException; import gate.util.NameBearer; @@ -128,9 +127,15 @@ * @return total length of populated documents in the corpus in number * of bytes */ + @Deprecated public long populate(URL singleConcatenatedFile, String documentRootElement, String encoding, int numberOfDocumentsToExtract, - String documentNamePrefix, DocType documentType) throws IOException, + String documentNamePrefix, gate.corpora.DocType documentType) throws IOException, ResourceInstantiationException; + + public long populate(URL singleConcatenatedFile, String documentRootElement, + String encoding, int numberOfDocumentsToExtract, + String documentNamePrefix, String mimeType, boolean includeRootElement) throws IOException, + ResourceInstantiationException; } // interface SimpleCorpus Modified: gate/trunk/src/gate/corpora/CorpusImpl.java =================================================================== --- gate/trunk/src/gate/corpora/CorpusImpl.java 2012-11-02 02:21:08 UTC (rev 16218) +++ gate/trunk/src/gate/corpora/CorpusImpl.java 2012-11-02 07:41:13 UTC (rev 16219) @@ -511,9 +511,8 @@ * @return total length of populated documents in the corpus in number * of bytes * @throws java.io.IOException - * @deprecated */ - @SuppressWarnings("deprecation") + @Deprecated public static long populate(Corpus corpus, URL singleConcatenatedFile, String documentRootElement, String encoding, int numberOfDocumentsToExtract, String documentNamePrefix, @@ -532,7 +531,7 @@ String documentRootElement, String encoding, int numberOfDocumentsToExtract, String documentNamePrefix, String mimeType, boolean includeRootElement) throws IOException { - + StatusListener sListener = (StatusListener)gate.Gate.getListeners().get("gate.event.StatusListener"); // obtain the root element that user has provided @@ -577,8 +576,6 @@ // continue until reached the end of file while(line != null) { - - // lowercase the line in order to match documentRootElement in any case String lowerCasedLine = line.toLowerCase(); @@ -623,7 +620,7 @@ searchingForStartElement = true; // here lets create a new document create the doc - if(sListener != null) sListener.statusChanged("Creating File Number :" + count); + if(sListener != null) sListener.statusChanged("Creating Document Number :" + count); String docName = documentNamePrefix + count + "_" + Gate.genSym(); @@ -648,8 +645,7 @@ } // already extracted requested num of documents? - if(numberOfDocumentsToExtract != -1 - && (count - 1) == numberOfDocumentsToExtract) break; + if((count - 1) == numberOfDocumentsToExtract) break; } catch(Throwable t) { String nl = Strings.getNl(); @@ -663,8 +659,9 @@ if(sListener != null) sListener.statusChanged(docName + " created!"); //TODO where do the 7 and 6 come from! - if(line.length() > index + 7) + if(line.length() > index + 7) { line = line.substring(index + 6); + } else line = br.readLine(); } } @@ -692,7 +689,7 @@ * @return total length of populated documents in the corpus in number * of bytes */ - @SuppressWarnings("deprecation") + @Deprecated public long populate(URL singleConcatenatedFile, String documentRootElement, String encoding, int numberOfFilesToExtract, String documentNamePrefix, DocType documentType) throws IOException, @@ -700,6 +697,15 @@ return populate(this, singleConcatenatedFile, documentRootElement, encoding, numberOfFilesToExtract, documentNamePrefix, documentType); } + + public long populate(URL singleConcatenatedFile, String documentRootElement, + String encoding, int numberOfFilesToExtract, + String documentNamePrefix, String mimeType, boolean includeRootElement) throws IOException, + ResourceInstantiationException { + return CorpusImpl.populate(this, singleConcatenatedFile, + documentRootElement, encoding, numberOfFilesToExtract, + documentNamePrefix, mimeType, includeRootElement); +} public synchronized void removeCorpusListener(CorpusListener l) { if(corpusListeners != null && corpusListeners.contains(l)) { Modified: gate/trunk/src/gate/corpora/DocType.java =================================================================== --- gate/trunk/src/gate/corpora/DocType.java 2012-11-02 02:21:08 UTC (rev 16218) +++ gate/trunk/src/gate/corpora/DocType.java 2012-11-02 07:41:13 UTC (rev 16219) @@ -16,8 +16,8 @@ /** * Enum for different types of documents. * @author niraj - * @deprecated */ +@Deprecated public enum DocType { HTML, XML, OTHER; } Modified: gate/trunk/src/gate/corpora/SerialCorpusImpl.java =================================================================== --- gate/trunk/src/gate/corpora/SerialCorpusImpl.java 2012-11-02 02:21:08 UTC (rev 16218) +++ gate/trunk/src/gate/corpora/SerialCorpusImpl.java 2012-11-02 07:41:13 UTC (rev 16219) @@ -403,7 +403,7 @@ * @return total length of populated documents in the corpus in number * of bytes */ - @SuppressWarnings("deprecation") + @Deprecated public long populate(URL singleConcatenatedFile, String documentRootElement, String encoding, int numberOfFilesToExtract, String documentNamePrefix, DocType documentType) throws IOException, @@ -412,6 +412,15 @@ documentRootElement, encoding, numberOfFilesToExtract, documentNamePrefix, documentType); } + + public long populate(URL singleConcatenatedFile, String documentRootElement, + String encoding, int numberOfFilesToExtract, + String documentNamePrefix, String mimeType, boolean includeRootElement) throws IOException, + ResourceInstantiationException { + return CorpusImpl.populate(this, singleConcatenatedFile, + documentRootElement, encoding, numberOfFilesToExtract, + documentNamePrefix, mimeType, includeRootElement); + } public synchronized void removeCorpusListener(CorpusListener l) { if(corpusListeners != null && corpusListeners.contains(l)) { Modified: gate/trunk/src/gate/gui/NameBearerHandle.java =================================================================== --- gate/trunk/src/gate/gui/NameBearerHandle.java 2012-11-02 02:21:08 UTC (rev 16218) +++ gate/trunk/src/gate/gui/NameBearerHandle.java 2012-11-02 07:41:13 UTC (rev 16219) @@ -1589,7 +1589,7 @@ .getDocumentRootElement(), scfInputDialog.getEncoding(), scfInputDialog.getNumOfDocumentsToFetch(), scfInputDialog .getDocumentNamePrefix(), scfInputDialog - .getDocumentType()); + .getDocumentMimeType(), true); if(((Corpus)target).getDataStore() != null) { ((LanguageResource)target).getDataStore().sync( (LanguageResource)target); Modified: gate/trunk/src/gate/gui/SingleConcatenatedFileInputDialog.java =================================================================== --- gate/trunk/src/gate/gui/SingleConcatenatedFileInputDialog.java 2012-11-02 02:21:08 UTC (rev 16218) +++ gate/trunk/src/gate/gui/SingleConcatenatedFileInputDialog.java 2012-11-02 07:41:13 UTC (rev 16219) @@ -13,8 +13,8 @@ */ package gate.gui; +import gate.DocumentFormat; import gate.Gate; -import gate.corpora.DocType; import java.awt.GridBagConstraints; import java.awt.GridBagLayout; @@ -22,6 +22,7 @@ import java.awt.event.ActionEvent; import java.awt.event.ActionListener; import java.io.IOException; +import java.util.Arrays; import javax.swing.JButton; import javax.swing.JComboBox; @@ -36,8 +37,6 @@ * A simple component that allows the user to select a trec web file and * encoding */ - -@SuppressWarnings("deprecation") public class SingleConcatenatedFileInputDialog extends JPanel { public SingleConcatenatedFileInputDialog() { @@ -117,15 +116,18 @@ constraints.anchor = GridBagConstraints.WEST; constraints.fill = GridBagConstraints.NONE; constraints.insets = new Insets(0, 0, 0, 5); - add(new JLabel("Document type:"), constraints); + add(new JLabel("Document Mime Type:"), constraints); constraints = new GridBagConstraints(); constraints.gridx = GridBagConstraints.RELATIVE; constraints.gridy = 3; constraints.gridwidth = 4; constraints.fill = GridBagConstraints.HORIZONTAL; - documentTypeComboBox = new JComboBox(DocType.values()); + Object[] mimeTypes = DocumentFormat.getSupportedMimeTypes().toArray(); + Arrays.sort(mimeTypes); + documentTypeComboBox = new JComboBox(mimeTypes); documentTypeComboBox.setEditable(false); + documentTypeComboBox.setSelectedItem("text/html"); add(documentTypeComboBox, constraints); // fifth row @@ -250,15 +252,15 @@ /** * Gets the selected document type. */ - public DocType getDocumentType() { - return (DocType)this.documentTypeComboBox.getSelectedItem(); + public String getDocumentMimeType() { + return (String)this.documentTypeComboBox.getSelectedItem(); } /** * Sets the document type */ - public void setDocumentType(DocType documentType) { - this.documentTypeComboBox.setSelectedItem(documentType); + public void setDocumentMimeType(String mimeType) { + this.documentTypeComboBox.setSelectedItem(mimeType); } /** This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. ------------------------------------------------------------------------------ LogMeIn Central: Instant, anywhere, Remote PC access and management. Stay in control, update software, and manage PCs from one command center Diagnose problems and improve visibility into emerging IT issues Automate, monitor and manage. Do more in less time with Central http://p.sf.net/sfu/logmein12331_d2d _______________________________________________ GATE-cvs mailing list GATE-cvs@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/gate-cvs