Revision: 16219
          http://gate.svn.sourceforge.net/gate/?rev=16219&view=rev
Author:   markagreenwood
Date:     2012-11-02 07:41:13 +0000 (Fri, 02 Nov 2012)
Log Message:
-----------
switched over to calling the new method directly, and updated the GUI to show a 
drop down of known mime types -- needs updating when the set of mime types 
changes

Modified Paths:
--------------
    gate/trunk/src/gate/DocumentFormat.java
    gate/trunk/src/gate/SimpleCorpus.java
    gate/trunk/src/gate/corpora/CorpusImpl.java
    gate/trunk/src/gate/corpora/DocType.java
    gate/trunk/src/gate/corpora/SerialCorpusImpl.java
    gate/trunk/src/gate/gui/NameBearerHandle.java
    gate/trunk/src/gate/gui/SingleConcatenatedFileInputDialog.java

Modified: gate/trunk/src/gate/DocumentFormat.java
===================================================================
--- gate/trunk/src/gate/DocumentFormat.java     2012-11-02 02:21:08 UTC (rev 
16218)
+++ gate/trunk/src/gate/DocumentFormat.java     2012-11-02 07:41:13 UTC (rev 
16219)
@@ -163,6 +163,10 @@
     if(fileSufix == null) return null;
     return  suffixes2mimeTypeMap.get(fileSufix.toLowerCase());
   }//getMimeType
+  
+  public static Set<String> getSupportedMimeTypes() {
+    return Collections.unmodifiableSet(mimeString2mimeTypeMap.keySet());
+  }
 
   /**
     * Returns a MymeType having as input a URL object. If the MimeType wasn't

Modified: gate/trunk/src/gate/SimpleCorpus.java
===================================================================
--- gate/trunk/src/gate/SimpleCorpus.java       2012-11-02 02:21:08 UTC (rev 
16218)
+++ gate/trunk/src/gate/SimpleCorpus.java       2012-11-02 07:41:13 UTC (rev 
16219)
@@ -16,7 +16,6 @@
 
 package gate;
 
-import gate.corpora.DocType;
 import gate.creole.ResourceInstantiationException;
 import gate.util.NameBearer;
 
@@ -128,9 +127,15 @@
    * @return total length of populated documents in the corpus in number
    *         of bytes
    */
+  @Deprecated
   public long populate(URL singleConcatenatedFile, String documentRootElement,
           String encoding, int numberOfDocumentsToExtract,
-          String documentNamePrefix, DocType documentType) throws IOException,
+          String documentNamePrefix, gate.corpora.DocType documentType) throws 
IOException,
           ResourceInstantiationException;
+  
+  public long populate(URL singleConcatenatedFile, String documentRootElement,
+      String encoding, int numberOfDocumentsToExtract,
+      String documentNamePrefix, String mimeType, boolean includeRootElement) 
throws IOException,
+      ResourceInstantiationException;
 
 } // interface SimpleCorpus

Modified: gate/trunk/src/gate/corpora/CorpusImpl.java
===================================================================
--- gate/trunk/src/gate/corpora/CorpusImpl.java 2012-11-02 02:21:08 UTC (rev 
16218)
+++ gate/trunk/src/gate/corpora/CorpusImpl.java 2012-11-02 07:41:13 UTC (rev 
16219)
@@ -511,9 +511,8 @@
    * @return total length of populated documents in the corpus in number
    *         of bytes
    * @throws java.io.IOException
-   * @deprecated
    */
-  @SuppressWarnings("deprecation")
+  @Deprecated
   public static long populate(Corpus corpus, URL singleConcatenatedFile,
       String documentRootElement, String encoding,
       int numberOfDocumentsToExtract, String documentNamePrefix,
@@ -532,7 +531,7 @@
       String documentRootElement, String encoding,
       int numberOfDocumentsToExtract, String documentNamePrefix,
       String mimeType, boolean includeRootElement) throws IOException { 
-
+    
     StatusListener sListener = 
(StatusListener)gate.Gate.getListeners().get("gate.event.StatusListener");
     
     // obtain the root element that user has provided
@@ -577,8 +576,6 @@
       // continue until reached the end of file
       while(line != null) {
 
-       
-
         // lowercase the line in order to match documentRootElement in any case
         String lowerCasedLine = line.toLowerCase();
 
@@ -623,7 +620,7 @@
             searchingForStartElement = true;
 
             // here lets create a new document create the doc
-            if(sListener != null) sListener.statusChanged("Creating File 
Number :" + count);
+            if(sListener != null) sListener.statusChanged("Creating Document 
Number :" + count);
             
             String docName = documentNamePrefix + count + "_" + Gate.genSym();
             
@@ -648,8 +645,7 @@
               }
               
               // already extracted requested num of documents?
-              if(numberOfDocumentsToExtract != -1
-                      && (count - 1) == numberOfDocumentsToExtract) break;
+              if((count - 1) == numberOfDocumentsToExtract) break;
             }
             catch(Throwable t) {
               String nl = Strings.getNl();
@@ -663,8 +659,9 @@
             if(sListener != null) sListener.statusChanged(docName + " 
created!");
 
             //TODO where do the 7 and 6 come from!
-            if(line.length() > index + 7)
+            if(line.length() > index + 7) {
               line = line.substring(index + 6);
+            }
             else line = br.readLine();
           }
         }
@@ -692,7 +689,7 @@
    * @return total length of populated documents in the corpus in number
    *         of bytes
    */
-  @SuppressWarnings("deprecation")
+  @Deprecated
   public long populate(URL singleConcatenatedFile, String documentRootElement,
           String encoding, int numberOfFilesToExtract,
           String documentNamePrefix, DocType documentType) throws IOException,
@@ -700,6 +697,15 @@
     return populate(this, singleConcatenatedFile, documentRootElement,
             encoding, numberOfFilesToExtract, documentNamePrefix, 
documentType);
   }
+  
+  public long populate(URL singleConcatenatedFile, String documentRootElement,
+      String encoding, int numberOfFilesToExtract,
+      String documentNamePrefix, String mimeType, boolean includeRootElement) 
throws IOException,
+      ResourceInstantiationException {
+    return CorpusImpl.populate(this, singleConcatenatedFile,
+        documentRootElement, encoding, numberOfFilesToExtract,
+        documentNamePrefix, mimeType, includeRootElement);
+}
 
   public synchronized void removeCorpusListener(CorpusListener l) {
     if(corpusListeners != null && corpusListeners.contains(l)) {

Modified: gate/trunk/src/gate/corpora/DocType.java
===================================================================
--- gate/trunk/src/gate/corpora/DocType.java    2012-11-02 02:21:08 UTC (rev 
16218)
+++ gate/trunk/src/gate/corpora/DocType.java    2012-11-02 07:41:13 UTC (rev 
16219)
@@ -16,8 +16,8 @@
 /**
  * Enum for different types of documents.
  * @author niraj
- * @deprecated
  */
+@Deprecated
 public enum DocType {
   HTML, XML, OTHER;
 }

Modified: gate/trunk/src/gate/corpora/SerialCorpusImpl.java
===================================================================
--- gate/trunk/src/gate/corpora/SerialCorpusImpl.java   2012-11-02 02:21:08 UTC 
(rev 16218)
+++ gate/trunk/src/gate/corpora/SerialCorpusImpl.java   2012-11-02 07:41:13 UTC 
(rev 16219)
@@ -403,7 +403,7 @@
    * @return total length of populated documents in the corpus in number
    *         of bytes
    */
-  @SuppressWarnings("deprecation")
+  @Deprecated
   public long populate(URL singleConcatenatedFile, String documentRootElement,
           String encoding, int numberOfFilesToExtract,
           String documentNamePrefix, DocType documentType) throws IOException,
@@ -412,6 +412,15 @@
             documentRootElement, encoding, numberOfFilesToExtract,
             documentNamePrefix, documentType);
   }
+  
+  public long populate(URL singleConcatenatedFile, String documentRootElement,
+          String encoding, int numberOfFilesToExtract,
+          String documentNamePrefix, String mimeType, boolean 
includeRootElement) throws IOException,
+          ResourceInstantiationException {
+    return CorpusImpl.populate(this, singleConcatenatedFile,
+            documentRootElement, encoding, numberOfFilesToExtract,
+            documentNamePrefix, mimeType, includeRootElement);
+  }
 
   public synchronized void removeCorpusListener(CorpusListener l) {
     if(corpusListeners != null && corpusListeners.contains(l)) {

Modified: gate/trunk/src/gate/gui/NameBearerHandle.java
===================================================================
--- gate/trunk/src/gate/gui/NameBearerHandle.java       2012-11-02 02:21:08 UTC 
(rev 16218)
+++ gate/trunk/src/gate/gui/NameBearerHandle.java       2012-11-02 07:41:13 UTC 
(rev 16219)
@@ -1589,7 +1589,7 @@
                       .getDocumentRootElement(), scfInputDialog.getEncoding(),
                       scfInputDialog.getNumOfDocumentsToFetch(), scfInputDialog
                               .getDocumentNamePrefix(), scfInputDialog
-                              .getDocumentType());
+                              .getDocumentMimeType(), true);
               if(((Corpus)target).getDataStore() != null) {
                 ((LanguageResource)target).getDataStore().sync(
                         (LanguageResource)target);

Modified: gate/trunk/src/gate/gui/SingleConcatenatedFileInputDialog.java
===================================================================
--- gate/trunk/src/gate/gui/SingleConcatenatedFileInputDialog.java      
2012-11-02 02:21:08 UTC (rev 16218)
+++ gate/trunk/src/gate/gui/SingleConcatenatedFileInputDialog.java      
2012-11-02 07:41:13 UTC (rev 16219)
@@ -13,8 +13,8 @@
  */
 package gate.gui;
 
+import gate.DocumentFormat;
 import gate.Gate;
-import gate.corpora.DocType;
 
 import java.awt.GridBagConstraints;
 import java.awt.GridBagLayout;
@@ -22,6 +22,7 @@
 import java.awt.event.ActionEvent;
 import java.awt.event.ActionListener;
 import java.io.IOException;
+import java.util.Arrays;
 
 import javax.swing.JButton;
 import javax.swing.JComboBox;
@@ -36,8 +37,6 @@
  * A simple component that allows the user to select a trec web file and
  * encoding
  */
-
-@SuppressWarnings("deprecation")
 public class SingleConcatenatedFileInputDialog extends JPanel {
 
   public SingleConcatenatedFileInputDialog() {
@@ -117,15 +116,18 @@
     constraints.anchor = GridBagConstraints.WEST;
     constraints.fill = GridBagConstraints.NONE;
     constraints.insets = new Insets(0, 0, 0, 5);
-    add(new JLabel("Document type:"), constraints);
+    add(new JLabel("Document Mime Type:"), constraints);
 
     constraints = new GridBagConstraints();
     constraints.gridx = GridBagConstraints.RELATIVE;
     constraints.gridy = 3;
     constraints.gridwidth = 4;
     constraints.fill = GridBagConstraints.HORIZONTAL;
-    documentTypeComboBox = new JComboBox(DocType.values());
+    Object[] mimeTypes = DocumentFormat.getSupportedMimeTypes().toArray();
+    Arrays.sort(mimeTypes);
+    documentTypeComboBox = new JComboBox(mimeTypes);
     documentTypeComboBox.setEditable(false);
+    documentTypeComboBox.setSelectedItem("text/html");
     add(documentTypeComboBox, constraints);
 
     // fifth row
@@ -250,15 +252,15 @@
   /**
    * Gets the selected document type.
    */
-  public DocType getDocumentType() {
-    return (DocType)this.documentTypeComboBox.getSelectedItem();
+  public String getDocumentMimeType() {
+    return (String)this.documentTypeComboBox.getSelectedItem();
   }
 
   /**
    * Sets the document type
    */
-  public void setDocumentType(DocType documentType) {
-    this.documentTypeComboBox.setSelectedItem(documentType);
+  public void setDocumentMimeType(String mimeType) {
+    this.documentTypeComboBox.setSelectedItem(mimeType);
   }
 
   /**

This was sent by the SourceForge.net collaborative development platform, the 
world's largest Open Source development site.


------------------------------------------------------------------------------
LogMeIn Central: Instant, anywhere, Remote PC access and management.
Stay in control, update software, and manage PCs from one command center
Diagnose problems and improve visibility into emerging IT issues
Automate, monitor and manage. Do more in less time with Central
http://p.sf.net/sfu/logmein12331_d2d
_______________________________________________
GATE-cvs mailing list
GATE-cvs@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/gate-cvs

Reply via email to