Revision: 16883
          http://sourceforge.net/p/gate/code/16883
Author:   adamfunk
Date:     2013-09-12 15:58:26 +0000 (Thu, 12 Sep 2013)
Log Message:
-----------
WIP on new Twitter population

Modified Paths:
--------------
    gate/trunk/plugins/Twitter/src/gate/corpora/JSONTweetFormat.java
    gate/trunk/plugins/Twitter/src/gate/corpora/twitter/Tweet.java
    gate/trunk/plugins/Twitter/src/gate/corpora/twitter/TweetUtils.java

Modified: gate/trunk/plugins/Twitter/src/gate/corpora/JSONTweetFormat.java
===================================================================
--- gate/trunk/plugins/Twitter/src/gate/corpora/JSONTweetFormat.java    
2013-09-12 14:33:16 UTC (rev 16882)
+++ gate/trunk/plugins/Twitter/src/gate/corpora/JSONTweetFormat.java    
2013-09-12 15:58:26 UTC (rev 16883)
@@ -27,6 +27,8 @@
 
 /** Document format for handling JSON tweets: either one 
  *  object {...} or a list [{tweet...}, {tweet...}, ...].
+ *  
+ *  This format produces one GATE document from one JSON file.
  */
 @CreoleResource(name = "GATE JSON Tweet Document Format", isPrivate = true,
     autoinstances = {@AutoInstance(hidden = true)})
@@ -35,6 +37,7 @@
   private static final long serialVersionUID = 6878020036304333918L;
 
   public static final String TEXT_ATTRIBUTE = "text";
+  public static final String TWEET_ANNOTATION_TYPE = "Tweet";
   
   /** Default construction */
   public JSONTweetFormat() { super();}
@@ -95,7 +98,9 @@
       AnnotationSet originalMarkups = 
doc.getAnnotations(GateConstants.ORIGINAL_MARKUPS_ANNOT_SET_NAME);
       // Create Original markups annotations for each tweet
       for (Tweet tweet : tweets) {
-        originalMarkups.add(tweet.getStart(), tweet.getEnd(), "Tweet", 
tweet.getFeatures());
+        FeatureMap features = tweet.getFeatures();
+        features.remove(TEXT_ATTRIBUTE);
+        originalMarkups.add(tweet.getStart(), tweet.getEnd(), 
TWEET_ANNOTATION_TYPE, features);
       }
     }
     catch (InvalidOffsetException e) {

Modified: gate/trunk/plugins/Twitter/src/gate/corpora/twitter/Tweet.java
===================================================================
--- gate/trunk/plugins/Twitter/src/gate/corpora/twitter/Tweet.java      
2013-09-12 14:33:16 UTC (rev 16882)
+++ gate/trunk/plugins/Twitter/src/gate/corpora/twitter/Tweet.java      
2013-09-12 15:58:26 UTC (rev 16883)
@@ -14,15 +14,12 @@
 package gate.corpora.twitter;
 
 import gate.*;
-import gate.util.DocumentFormatException;
-import gate.util.InvalidOffsetException;
+import gate.util.*;
+import gate.corpora.*;
 import gate.corpora.JSONTweetFormat;
 import java.util.*;
 import org.apache.commons.lang.StringEscapeUtils;
-import org.apache.commons.lang.StringUtils;
 import com.fasterxml.jackson.databind.JsonNode;
-import com.fasterxml.jackson.databind.ObjectMapper;
-import com.fasterxml.jackson.databind.node.ArrayNode;
 
 
 // JSON API
@@ -39,6 +36,9 @@
   private FeatureMap features;
   private long start;
   
+  public static String PATH_SEPARATOR = ":";
+  
+  
   public int getLength() {
     return this.string.length();
   }
@@ -51,6 +51,14 @@
     return this.features;
   }
   
+  public FeatureMap getFlattenedFeatures() {
+    return TweetUtils.flatten(this.features, PATH_SEPARATOR);
+  }
+  
+  public FeatureMap getFilteredFeatures(Collection<String> keepKeys) {
+    return TweetUtils.filterFeatures(this.getFlattenedFeatures(), keepKeys);
+  }
+  
   public void setStart(long start) {
     this.start = start;
   }
@@ -71,12 +79,11 @@
 
     while (keys.hasNext()) {
       String key = keys.next();
+      features.put(key.toString(), process(json.get(key)));
+
       if (key.equals(JSONTweetFormat.TEXT_ATTRIBUTE)) {
         string = StringEscapeUtils.unescapeHtml(json.get(key).asText());
       }
-      else {
-        features.put(key.toString(), process(json.get(key)));
-      }
     }
   }
   
@@ -87,7 +94,8 @@
   }
 
   
-  private Object process(JsonNode node) {
+  
+  private static Object process(JsonNode node) {
     /* JSON types: number, string, boolean, array, object (dict/map),
      * null.  All map keys are strings.
      */
@@ -131,8 +139,25 @@
   }
   
   
-  //public DocumentImpl toDocument(List<String> keepFeatures, FeatureMap 
contentItems) {
-  //}
+
+//  public Document toDocument(List<String> keepFeatures, FeatureMap 
contentItems) throws GateException {
+//    FeatureMap parameters = Factory.newFeatureMap();
+//    parameters.put(Document.DOCUMENT_STRING_CONTENT_PARAMETER_NAME, "");
+//    Document doc = (Document) 
Factory.createResource(DocumentImpl.class.getName(), parameters);
+//    //doc.setSourceUrl(sourceUrl);
+//    
+//    // this is wrong: we need various strings with content annotations over 
them
+//    DocumentContent newContent= new DocumentContentImpl(this.getString());
+//    doc.setContent(newContent);
+//    AnnotationSet originalMarkups = 
doc.getAnnotations(GateConstants.ORIGINAL_MARKUPS_ANNOT_SET_NAME);
+//
+//    originalMarkups.add(0L, newContent.size(), 
JSONTweetFormat.TWEET_ANNOTATION_TYPE, Factory.newFeatureMap());
+//
+//    // TODO: copy all the keepFeatures
+//    
+//    return doc;
+//  }
+
   
   
 }

Modified: gate/trunk/plugins/Twitter/src/gate/corpora/twitter/TweetUtils.java
===================================================================
--- gate/trunk/plugins/Twitter/src/gate/corpora/twitter/TweetUtils.java 
2013-09-12 14:33:16 UTC (rev 16882)
+++ gate/trunk/plugins/Twitter/src/gate/corpora/twitter/TweetUtils.java 
2013-09-12 15:58:26 UTC (rev 16883)
@@ -14,14 +14,8 @@
 package gate.corpora.twitter;
 
 import gate.*;
-import gate.creole.ResourceInstantiationException;
-import gate.util.DocumentFormatException;
-import gate.util.InvalidOffsetException;
-import gate.corpora.*;
 import java.io.IOException;
 import java.util.*;
-import org.apache.commons.lang.StringEscapeUtils;
-import org.apache.commons.lang.StringUtils;
 import com.fasterxml.jackson.databind.JsonNode;
 import com.fasterxml.jackson.databind.ObjectMapper;
 import com.fasterxml.jackson.databind.node.ArrayNode;
@@ -77,5 +71,40 @@
   }
   
   
+  
+  public static FeatureMap filterFeatures(FeatureMap source, 
Collection<String> keep) {
+    FeatureMap result = Factory.newFeatureMap();
+    for (Object key : source.keySet()) {
+      if (keep.contains(key.toString())) {
+        result.put(key, source.get(key));
+      }
+    }
+    return result;
+  }
+  
+  
+  public static FeatureMap flatten(FeatureMap features, String separator) {
+    return flatten(features, "", separator);
+  }
+  
+  
+  private static FeatureMap flatten(Map<?, ?> map, String prefix, String 
separator) {
+    FeatureMap flattened = Factory.newFeatureMap();
 
+    for (Object key : map.keySet()) {
+      String flatKey = prefix + key.toString();
+      Object value = map.keySet();
+      if (value instanceof Map) {
+        flattened.putAll(flatten((Map<?, ?>) value, flatKey + separator, 
separator));
+      }
+      else {
+        flattened.put(flatKey, value);
+      }
+    }
+    return flattened;
+  }
+  
+
+  
+
 }

This was sent by the SourceForge.net collaborative development platform, the 
world's largest Open Source development site.


------------------------------------------------------------------------------
How ServiceNow helps IT people transform IT departments:
1. Consolidate legacy IT systems to a single system of record for IT
2. Standardize and globalize service processes across IT
3. Implement zero-touch automation to replace manual, redundant tasks
http://pubads.g.doubleclick.net/gampad/clk?id=51271111&iu=/4140/ostg.clktrk
_______________________________________________
GATE-cvs mailing list
[email protected]
https://lists.sourceforge.net/lists/listinfo/gate-cvs

Reply via email to