Revision: 18319
          http://sourceforge.net/p/gate/code/18319
Author:   adamfunk
Date:     2014-09-11 19:51:08 +0000 (Thu, 11 Sep 2014)
Log Message:
-----------
WIP

Added Paths:
-----------
    
gate/branches/twitter-pop-dev/plugins/Twitter/src/gate/corpora/twitter/TweetStreamIterable.java
    
gate/branches/twitter-pop-dev/plugins/Twitter/src/gate/corpora/twitter/TweetStreamIterator.java

Added: 
gate/branches/twitter-pop-dev/plugins/Twitter/src/gate/corpora/twitter/TweetStreamIterable.java
===================================================================
--- 
gate/branches/twitter-pop-dev/plugins/Twitter/src/gate/corpora/twitter/TweetStreamIterable.java
                             (rev 0)
+++ 
gate/branches/twitter-pop-dev/plugins/Twitter/src/gate/corpora/twitter/TweetStreamIterable.java
     2014-09-11 19:51:08 UTC (rev 18319)
@@ -0,0 +1,35 @@
+/*
+ *  Copyright (c) 1995-2014, The University of Sheffield. See the file
+ *  COPYRIGHT.txt in the software or at http://gate.ac.uk/gate/COPYRIGHT.txt
+ *
+ *  This file is part of GATE (see http://gate.ac.uk/), and is free
+ *  software, licenced under the GNU Library General Public License,
+ *  Version 2, June 1991 (in the distribution as file licence.html,
+ *  and also available at http://gate.ac.uk/gate/licence.html).
+ *  
+ *  $Id$
+ */
+package gate.corpora.twitter;
+
+import java.io.InputStream;
+import java.util.Iterator;
+
+/**
+ * Iterable version, just to make loops easier.
+ * @author adam
+ *
+ */
+public class TweetStreamIterable implements Iterable<Tweet> {
+
+  InputStream input;
+  
+  public TweetStreamIterable(InputStream input) {
+    this.input = input;
+  }
+
+  @Override
+  public Iterator<Tweet> iterator() {
+    return new TweetStreamIterator(input);
+  }
+  
+}


Property changes on: 
gate/branches/twitter-pop-dev/plugins/Twitter/src/gate/corpora/twitter/TweetStreamIterable.java
___________________________________________________________________
Added: svn:keywords
## -0,0 +1 ##
+Id
\ No newline at end of property
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
Added: 
gate/branches/twitter-pop-dev/plugins/Twitter/src/gate/corpora/twitter/TweetStreamIterator.java
===================================================================
--- 
gate/branches/twitter-pop-dev/plugins/Twitter/src/gate/corpora/twitter/TweetStreamIterator.java
                             (rev 0)
+++ 
gate/branches/twitter-pop-dev/plugins/Twitter/src/gate/corpora/twitter/TweetStreamIterator.java
     2014-09-11 19:51:08 UTC (rev 18319)
@@ -0,0 +1,112 @@
+/*
+ *  Copyright (c) 1995-2014, The University of Sheffield. See the file
+ *  COPYRIGHT.txt in the software or at http://gate.ac.uk/gate/COPYRIGHT.txt
+ *
+ *  This file is part of GATE (see http://gate.ac.uk/), and is free
+ *  software, licenced under the GNU Library General Public License,
+ *  Version 2, June 1991 (in the distribution as file licence.html,
+ *  and also available at http://gate.ac.uk/gate/licence.html).
+ *  
+ *  $Id$
+ */
+package gate.corpora.twitter;
+
+import gate.Document;
+import gate.Factory;
+import gate.FeatureMap;
+
+import java.io.IOException;
+import java.io.InputStream;
+import java.util.Iterator;
+import java.util.List;
+
+import com.fasterxml.jackson.core.JsonParseException;
+import com.fasterxml.jackson.core.JsonParser;
+import com.fasterxml.jackson.core.JsonParser.Feature;
+import com.fasterxml.jackson.core.JsonPointer;
+import com.fasterxml.jackson.core.JsonToken;
+import com.fasterxml.jackson.databind.JsonNode;
+import com.fasterxml.jackson.databind.MappingIterator;
+import com.fasterxml.jackson.databind.ObjectMapper;
+
+public class TweetStreamIterator implements Iterator<Tweet> {
+
+  // Borrowed from gcp IOConstants
+  public static final String ID_POINTER = "/id_str";
+  
+
+  private ObjectMapper objectMapper;
+  private JsonParser jsonParser;
+  private MappingIterator<JsonNode> iterator;
+  private boolean gzip;
+  private List<String> contentKeys, featureKeys;
+  protected JsonPointer idPointer;
+
+
+  public TweetStreamIterator(InputStream input, List<String> contentKeys, 
+          List<String> featureKeys, boolean gzip) throws JsonParseException, 
IOException {
+    this.contentKeys = contentKeys;
+    this.featureKeys = featureKeys;
+    this.gzip = gzip;
+    
+    if (gzip) {
+      throw new IllegalArgumentException("gzip not yet supported!");
+    }
+    // TODO support compression
+    
+    // Following borrowed from gcp JSONStreamingInputHandler
+    idPointer = JsonPointer.compile(ID_POINTER);
+    objectMapper = new ObjectMapper();
+    jsonParser = 
objectMapper.getFactory().createParser(input).enable(Feature.AUTO_CLOSE_SOURCE);
+    // If the first token in the stream is the start of an array ("[")
+    // then
+    // assume the stream as a whole is an array of objects, one per
+    // document.
+    // To handle this, simply clear the token - The MappingIterator
+    // returned
+    // by readValues will cope with the rest in either form.
+    if(jsonParser.nextToken() == JsonToken.START_ARRAY) {
+      jsonParser.clearCurrentToken();
+    }
+    iterator = objectMapper.readValues(jsonParser, JsonNode.class);
+  }
+
+  
+  @Override
+  public boolean hasNext() {
+    return iterator.hasNext();
+    // should this be hasNextValue() ?
+  }
+
+  @Override
+  public Tweet next() {
+    try {
+      // why while not if?
+      while(iterator.hasNextValue()) {
+        JsonNode json = iterator.nextValue();
+        String id = json.at(idPointer).asText();
+        // Is it worth testing IDs here?
+        return Tweet.readTweet(json, contentKeys, featureKeys);
+      }
+    }
+    catch (IOException e) {
+      e.printStackTrace();
+    }
+    return null;
+  }
+
+  @Override
+  public void remove() {
+    // TODO Auto-generated method stub
+    
+  }
+  
+  
+  public void close() {
+    // TODO
+  }
+  
+  
+  
+  
+}


Property changes on: 
gate/branches/twitter-pop-dev/plugins/Twitter/src/gate/corpora/twitter/TweetStreamIterator.java
___________________________________________________________________
Added: svn:keywords
## -0,0 +1 ##
+Id
\ No newline at end of property
Added: svn:eol-style
## -0,0 +1 ##
+native
\ No newline at end of property
This was sent by the SourceForge.net collaborative development platform, the 
world's largest Open Source development site.


------------------------------------------------------------------------------
Want excitement?
Manually upgrade your production database.
When you want reliability, choose Perforce
Perforce version control. Predictably reliable.
http://pubads.g.doubleclick.net/gampad/clk?id=157508191&iu=/4140/ostg.clktrk
_______________________________________________
GATE-cvs mailing list
GATE-cvs@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/gate-cvs

Reply via email to