Revision: 18335
          http://sourceforge.net/p/gate/code/18335
Author:   adamfunk
Date:     2014-09-15 09:25:03 +0000 (Mon, 15 Sep 2014)
Log Message:
-----------
Fixed the hasNext functionality so we can detect & delete empty doc if 0 tweets 
in statuses array.

Modified Paths:
--------------
    
gate/branches/twitter-pop-dev/plugins/Twitter/src/gate/corpora/twitter/Population.java
    
gate/branches/twitter-pop-dev/plugins/Twitter/src/gate/corpora/twitter/TweetStreamIterator.java

Modified: 
gate/branches/twitter-pop-dev/plugins/Twitter/src/gate/corpora/twitter/Population.java
===================================================================
--- 
gate/branches/twitter-pop-dev/plugins/Twitter/src/gate/corpora/twitter/Population.java
      2014-09-15 01:20:33 UTC (rev 18334)
+++ 
gate/branches/twitter-pop-dev/plugins/Twitter/src/gate/corpora/twitter/Population.java
      2014-09-15 09:25:03 UTC (rev 18335)
@@ -102,9 +102,6 @@
         tweetCounter++;
       } // end of Tweet loop
       
-      System.out.println("CL = " + content.length());
-      System.out.println("TC = " + tweetCounter);
-
       if (content.length() > 0) {
         closeDocument(document, content, annotandaOffsets, corpus);
       }

Modified: 
gate/branches/twitter-pop-dev/plugins/Twitter/src/gate/corpora/twitter/TweetStreamIterator.java
===================================================================
--- 
gate/branches/twitter-pop-dev/plugins/Twitter/src/gate/corpora/twitter/TweetStreamIterator.java
     2014-09-15 01:20:33 UTC (rev 18334)
+++ 
gate/branches/twitter-pop-dev/plugins/Twitter/src/gate/corpora/twitter/TweetStreamIterator.java
     2014-09-15 09:25:03 UTC (rev 18335)
@@ -39,8 +39,9 @@
   private JsonParser jsonParser;
   private MappingIterator<JsonNode> iterator;
   private List<String> contentKeys, featureKeys;
-  private boolean nested;
+  private boolean nested, hasNextNode;
   private Iterator<JsonNode> nestedStatuses;
+  private JsonNode nextNode; 
 
 
   public TweetStreamIterator(InputStream input, List<String> contentKeys, 
@@ -70,13 +71,20 @@
     iterator = objectMapper.readValues(jsonParser, JsonNode.class);
     this.nested = false;
     this.nestedStatuses = null;
+    this.hasNextNode = this.iterator.hasNext();
+    if (this.hasNextNode) {
+      this.nextNode = this.iterator.next();
+    }
   }
 
   
   @Override
   public boolean hasNext() {
-    // Should that be iterator.hasNextValue() ?
-    return this.iterator.hasNext() || 
+    /* Using this.iterator.hasNext() did not work for search result format, 
because
+     * it returns true if there is a JSON node with an empty statuses array.  
So we 
+     * have to read ahead a bit in order to let the loop in Population *not* 
run in
+     * that case (so we can suppress the empty document).  */
+    return (this.hasNextNode && nonEmpty(this.nextNode)) || 
             (this.nested && (this.nestedStatuses != null) && 
this.nestedStatuses.hasNext());
     // Belt & braces: this.nested should suffice.
   }
@@ -94,11 +102,9 @@
         this.nested = this.nestedStatuses.hasNext();
       }
       
-      else if (iterator.hasNextValue()) {
-        JsonNode json = iterator.nextValue();
-        
-        if (isSearchResultList(json)) {
-          this.nestedStatuses = getStatuses(json).iterator();
+      else if (this.hasNext()) {
+        if (isSearchResultList(this.nextNode)) {
+          this.nestedStatuses = getStatuses(this.nextNode).iterator();
           this.nested = this.nestedStatuses.hasNext();
           // Set the nested flag according as there is anything left
           // in thee statuses value array (which could be empty).
@@ -112,9 +118,14 @@
           this.nested = this.nestedStatuses.hasNext();
         }
         else {
-          result = Tweet.readTweet(json, contentKeys, featureKeys);
+          result = Tweet.readTweet(this.nextNode, contentKeys, featureKeys);
         }
       }
+      
+      if (! this.nested) {
+        hasNextNode = this.iterator.hasNext();
+        nextNode = hasNextNode ? this.iterator.next() : null;
+      }
     }
     catch (IOException e) {
       logger.warn("Internal error in TweetStreamIterator", e);
@@ -149,5 +160,21 @@
   }
   
   
+  public static boolean nonEmpty(JsonNode json) {
+    boolean result = false;
+    if (isSearchResultList(json)) {
+      try {
+        result = (getStatuses(json).size() > 0);
+      }
+      catch (IOException e) {
+        logger.warn("Internal error in TweetStreamIterator", e);
+      }
+    }
+    else {
+      result = true;
+    }
+    return result;
+  }
   
+    
 }

This was sent by the SourceForge.net collaborative development platform, the 
world's largest Open Source development site.


------------------------------------------------------------------------------
Want excitement?
Manually upgrade your production database.
When you want reliability, choose Perforce
Perforce version control. Predictably reliable.
http://pubads.g.doubleclick.net/gampad/clk?id=157508191&iu=/4140/ostg.clktrk
_______________________________________________
GATE-cvs mailing list
GATE-cvs@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/gate-cvs

Reply via email to