Revision: 18335 http://sourceforge.net/p/gate/code/18335 Author: adamfunk Date: 2014-09-15 09:25:03 +0000 (Mon, 15 Sep 2014) Log Message: ----------- Fixed the hasNext functionality so we can detect & delete empty doc if 0 tweets in statuses array.
Modified Paths: -------------- gate/branches/twitter-pop-dev/plugins/Twitter/src/gate/corpora/twitter/Population.java gate/branches/twitter-pop-dev/plugins/Twitter/src/gate/corpora/twitter/TweetStreamIterator.java Modified: gate/branches/twitter-pop-dev/plugins/Twitter/src/gate/corpora/twitter/Population.java =================================================================== --- gate/branches/twitter-pop-dev/plugins/Twitter/src/gate/corpora/twitter/Population.java 2014-09-15 01:20:33 UTC (rev 18334) +++ gate/branches/twitter-pop-dev/plugins/Twitter/src/gate/corpora/twitter/Population.java 2014-09-15 09:25:03 UTC (rev 18335) @@ -102,9 +102,6 @@ tweetCounter++; } // end of Tweet loop - System.out.println("CL = " + content.length()); - System.out.println("TC = " + tweetCounter); - if (content.length() > 0) { closeDocument(document, content, annotandaOffsets, corpus); } Modified: gate/branches/twitter-pop-dev/plugins/Twitter/src/gate/corpora/twitter/TweetStreamIterator.java =================================================================== --- gate/branches/twitter-pop-dev/plugins/Twitter/src/gate/corpora/twitter/TweetStreamIterator.java 2014-09-15 01:20:33 UTC (rev 18334) +++ gate/branches/twitter-pop-dev/plugins/Twitter/src/gate/corpora/twitter/TweetStreamIterator.java 2014-09-15 09:25:03 UTC (rev 18335) @@ -39,8 +39,9 @@ private JsonParser jsonParser; private MappingIterator<JsonNode> iterator; private List<String> contentKeys, featureKeys; - private boolean nested; + private boolean nested, hasNextNode; private Iterator<JsonNode> nestedStatuses; + private JsonNode nextNode; public TweetStreamIterator(InputStream input, List<String> contentKeys, @@ -70,13 +71,20 @@ iterator = objectMapper.readValues(jsonParser, JsonNode.class); this.nested = false; this.nestedStatuses = null; + this.hasNextNode = this.iterator.hasNext(); + if (this.hasNextNode) { + this.nextNode = this.iterator.next(); + } } @Override public boolean hasNext() { - // Should that be iterator.hasNextValue() ? - return this.iterator.hasNext() || + /* Using this.iterator.hasNext() did not work for search result format, because + * it returns true if there is a JSON node with an empty statuses array. So we + * have to read ahead a bit in order to let the loop in Population *not* run in + * that case (so we can suppress the empty document). */ + return (this.hasNextNode && nonEmpty(this.nextNode)) || (this.nested && (this.nestedStatuses != null) && this.nestedStatuses.hasNext()); // Belt & braces: this.nested should suffice. } @@ -94,11 +102,9 @@ this.nested = this.nestedStatuses.hasNext(); } - else if (iterator.hasNextValue()) { - JsonNode json = iterator.nextValue(); - - if (isSearchResultList(json)) { - this.nestedStatuses = getStatuses(json).iterator(); + else if (this.hasNext()) { + if (isSearchResultList(this.nextNode)) { + this.nestedStatuses = getStatuses(this.nextNode).iterator(); this.nested = this.nestedStatuses.hasNext(); // Set the nested flag according as there is anything left // in thee statuses value array (which could be empty). @@ -112,9 +118,14 @@ this.nested = this.nestedStatuses.hasNext(); } else { - result = Tweet.readTweet(json, contentKeys, featureKeys); + result = Tweet.readTweet(this.nextNode, contentKeys, featureKeys); } } + + if (! this.nested) { + hasNextNode = this.iterator.hasNext(); + nextNode = hasNextNode ? this.iterator.next() : null; + } } catch (IOException e) { logger.warn("Internal error in TweetStreamIterator", e); @@ -149,5 +160,21 @@ } + public static boolean nonEmpty(JsonNode json) { + boolean result = false; + if (isSearchResultList(json)) { + try { + result = (getStatuses(json).size() > 0); + } + catch (IOException e) { + logger.warn("Internal error in TweetStreamIterator", e); + } + } + else { + result = true; + } + return result; + } + } This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. ------------------------------------------------------------------------------ Want excitement? Manually upgrade your production database. When you want reliability, choose Perforce Perforce version control. Predictably reliable. http://pubads.g.doubleclick.net/gampad/clk?id=157508191&iu=/4140/ostg.clktrk _______________________________________________ GATE-cvs mailing list GATE-cvs@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/gate-cvs