Revision: 18331
          http://sourceforge.net/p/gate/code/18331
Author:   adamfunk
Date:     2014-09-12 20:35:28 +0000 (Fri, 12 Sep 2014)
Log Message:
-----------
WIP on the file location memory & the empty document issue.

Modified Paths:
--------------
    
gate/branches/twitter-pop-dev/plugins/Twitter/src/gate/corpora/twitter/Population.java
    
gate/branches/twitter-pop-dev/plugins/Twitter/src/gate/corpora/twitter/PopulationConfig.java
    
gate/branches/twitter-pop-dev/plugins/Twitter/src/gate/corpora/twitter/PopulationDialogWrapper.java
    
gate/branches/twitter-pop-dev/plugins/Twitter/src/gate/corpora/twitter/TweetStreamIterator.java

Modified: 
gate/branches/twitter-pop-dev/plugins/Twitter/src/gate/corpora/twitter/Population.java
===================================================================
--- 
gate/branches/twitter-pop-dev/plugins/Twitter/src/gate/corpora/twitter/Population.java
      2014-09-12 15:45:46 UTC (rev 18330)
+++ 
gate/branches/twitter-pop-dev/plugins/Twitter/src/gate/corpora/twitter/Population.java
      2014-09-12 20:35:28 UTC (rev 18331)
@@ -25,6 +25,7 @@
 import gate.gui.ResourceHelper;
 import gate.util.InvalidOffsetException;
 import java.awt.event.ActionEvent;
+import java.io.IOException;
 import java.io.InputStream;
 import java.net.MalformedURLException;
 import java.net.URL;
@@ -68,10 +69,12 @@
    */
   public static void populateCorpus(final Corpus corpus, URL inputUrl, String 
encoding, List<String> contentKeys,
       List<String> featureKeys, int tweetsPerDoc) throws 
ResourceInstantiationException {
+    
+    InputStream input = null;
     try {
-      InputStream input = inputUrl.openStream();
+      input = inputUrl.openStream();
       
-      // TODO detect & handle gzipped input
+      // TODO Detect & handle gzipped input.
       TweetStreamIterable tweetSource = new TweetStreamIterable(input, 
contentKeys, featureKeys, false);
 
       int tweetCounter = 0;
@@ -79,6 +82,8 @@
       StringBuilder content = new StringBuilder();
       Map<PreAnnotation, Integer> annotandaOffsets = new 
HashMap<PreAnnotation, Integer>();
       
+      // TODO Suppress empty documents (generated by 0-tweet files).
+      
       for (Tweet tweet : tweetSource) {
         if ( (tweetsPerDoc > 0) && (tweetCounter > 0) && ((tweetCounter % 
tweetsPerDoc) == 0) ) {
           closeDocument(document, content, annotandaOffsets, corpus);
@@ -97,6 +102,9 @@
         tweetCounter++;
       } // end of Tweet loop
       
+      System.out.println("CL = " + content.length());
+      System.out.println("TC = " + tweetCounter);
+
       if (content.length() > 0) {
         closeDocument(document, content, annotandaOffsets, corpus);
       }
@@ -112,6 +120,18 @@
     catch (Exception e) {
       throw new ResourceInstantiationException(e);
     }
+    finally {
+      if (input != null) {
+        try {
+          input.close();
+        } 
+        catch(IOException e) {
+          logger.warn("Error in Twitter Population", e);
+        }
+      }
+      
+    }
+    
   }
 
 

Modified: 
gate/branches/twitter-pop-dev/plugins/Twitter/src/gate/corpora/twitter/PopulationConfig.java
===================================================================
--- 
gate/branches/twitter-pop-dev/plugins/Twitter/src/gate/corpora/twitter/PopulationConfig.java
        2014-09-12 15:45:46 UTC (rev 18330)
+++ 
gate/branches/twitter-pop-dev/plugins/Twitter/src/gate/corpora/twitter/PopulationConfig.java
        2014-09-12 20:35:28 UTC (rev 18331)
@@ -14,7 +14,6 @@
 
 import gate.Gate;
 import gate.swing.XJFileChooser;
-//TODO Get GATE to remember last location.
 import java.awt.event.ActionEvent;
 import java.awt.event.ActionListener;
 import java.io.File;
@@ -152,6 +151,8 @@
   @Override
   public void actionPerformed(ActionEvent arg0) {
     XJFileChooser chooser = new XJFileChooser();
+    //TODO Get GATE to remember last location.
+    //chooser.setResource(PopulationConfig.class.getName());
     chooser.setDialogTitle("Load XML configuration");
     chooser.setFileSelectionMode(XJFileChooser.FILES_ONLY);
     int chosen = chooser.showOpenDialog(this.wrapper.dialog);

Modified: 
gate/branches/twitter-pop-dev/plugins/Twitter/src/gate/corpora/twitter/PopulationDialogWrapper.java
===================================================================
--- 
gate/branches/twitter-pop-dev/plugins/Twitter/src/gate/corpora/twitter/PopulationDialogWrapper.java
 2014-09-12 15:45:46 UTC (rev 18330)
+++ 
gate/branches/twitter-pop-dev/plugins/Twitter/src/gate/corpora/twitter/PopulationDialogWrapper.java
 2014-09-12 20:35:28 UTC (rev 18331)
@@ -14,7 +14,6 @@
 import gate.gui.ListEditorDialog;
 import gate.gui.MainFrame;
 import gate.swing.XJFileChooser;
-// TODO Get GATE to remember last location.
 import gate.util.ExtensionFileFilter;
 import gate.util.Strings;
 import java.awt.Window;
@@ -110,6 +109,8 @@
     dialog.add(Box.createVerticalStrut(2));
     
     chooser = new XJFileChooser();
+    // TODO Fix this to get GATE to remember last location.
+    //chooser.setResource(PopulationDialogWrapper.class.getName());
     chooser.setFileSelectionMode(XJFileChooser.FILES_ONLY);
     chooser.setMultiSelectionEnabled(true);
     chooser.setDialogTitle("Select a Twitter JSON file");

Modified: 
gate/branches/twitter-pop-dev/plugins/Twitter/src/gate/corpora/twitter/TweetStreamIterator.java
===================================================================
--- 
gate/branches/twitter-pop-dev/plugins/Twitter/src/gate/corpora/twitter/TweetStreamIterator.java
     2014-09-12 15:45:46 UTC (rev 18330)
+++ 
gate/branches/twitter-pop-dev/plugins/Twitter/src/gate/corpora/twitter/TweetStreamIterator.java
     2014-09-12 20:35:28 UTC (rev 18331)
@@ -75,11 +75,13 @@
   
   @Override
   public boolean hasNext() {
+    // Should that be iterator.hasNextValue() ?
     return this.iterator.hasNext() || 
-            ( (this.nestedStatuses != null) && this.nestedStatuses.hasNext());
-    // should that be iterator.hasNextValue() ?
+            (this.nested && (this.nestedStatuses != null) && 
this.nestedStatuses.hasNext());
+    // Belt & braces: this.nested should suffice.
   }
 
+  
   @Override
   public Tweet next() {
     Tweet result = null;
@@ -98,12 +100,12 @@
         if (isSearchResultList(json)) {
           this.nestedStatuses = getStatuses(json).iterator();
           this.nested = this.nestedStatuses.hasNext();
-          // Set the nested flag according as there is anything in 
-          // the statuses value array (it could be empty).
+          // Set the nested flag according as there is anything left
+          // in thee statuses value array (which could be empty).
         }
         
-        // Test nested now: true IFF we are in a search result thingy AND
-        // the statuses array is non-empty.
+        // Now let's test nested: true IFF we are in a search result thingy AND
+        // the thingy's statuses array is non-empty.
         if (this.nested) {
           result = Tweet.readTweet(this.nestedStatuses.next(), contentKeys, 
featureKeys);
           // Set the nested flag again for the next call to next()

This was sent by the SourceForge.net collaborative development platform, the 
world's largest Open Source development site.


------------------------------------------------------------------------------
Want excitement?
Manually upgrade your production database.
When you want reliability, choose Perforce
Perforce version control. Predictably reliable.
http://pubads.g.doubleclick.net/gampad/clk?id=157508191&iu=/4140/ostg.clktrk
_______________________________________________
GATE-cvs mailing list
GATE-cvs@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/gate-cvs

Reply via email to