Revision: 18401
          http://sourceforge.net/p/gate/code/18401
Author:   ian_roberts
Date:     2014-10-21 18:34:59 +0000 (Tue, 21 Oct 2014)
Log Message:
-----------
Added option to *not* include the "text" property when saving annotations as 
JSON.  This gives us a format we can use to distribute annotations against 
tweets without including the tweet text (just the ID) to comply with Twitter's 
terms.

Modified Paths:
--------------
    gate/trunk/src/main/gate/corpora/DocumentJsonUtils.java

Modified: gate/trunk/src/main/gate/corpora/DocumentJsonUtils.java
===================================================================
--- gate/trunk/src/main/gate/corpora/DocumentJsonUtils.java     2014-10-21 
14:36:36 UTC (rev 18400)
+++ gate/trunk/src/main/gate/corpora/DocumentJsonUtils.java     2014-10-21 
18:34:59 UTC (rev 18401)
@@ -229,7 +229,7 @@
   public static void writeDocument(Document doc, Long start, Long end,
           Map<String, Collection<Annotation>> annotationsMap, JsonGenerator 
json)
           throws JsonGenerationException, IOException, InvalidOffsetException {
-    writeDocument(doc, start, end, annotationsMap, null, null, json);
+    writeDocument(doc, start, end, annotationsMap, null, null, true, json);
   }
 
   /**
@@ -259,7 +259,7 @@
           Map<String, Collection<Annotation>> annotationsMap,
           Map<?, ?> extraFeatures, JsonGenerator json)
           throws JsonGenerationException, IOException, InvalidOffsetException {
-    writeDocument(doc, start, end, annotationsMap, extraFeatures, null, json);
+    writeDocument(doc, start, end, annotationsMap, extraFeatures, null, true, 
json);
   }
 
   /**
@@ -292,12 +292,49 @@
           Map<?, ?> extraFeatures, String annotationTypeProperty,
           JsonGenerator json) throws JsonGenerationException, IOException,
           InvalidOffsetException {
+    writeDocument(doc, start, end, annotationsMap, extraFeatures,
+            annotationTypeProperty, true, json);
+  }
 
+  /**
+   * Write a substring of a GATE document to the specified
+   * JsonGenerator. The specified window of document text will
+   * optionally be written as a property named "text" and the specified
+   * annotations will be written as "entities", with their offsets
+   * adjusted to be relative to the specified window.
+   * 
+   * @param doc the document to write
+   * @param start the start offset of the segment to write
+   * @param end the end offset of the segment to write
+   * @param extraFeatures additional properties to add to the generated
+   *          JSON. If the map includes a "text" key this will be
+   *          ignored, and if it contains a key "entities" whose value
+   *          is a map then these entities will be merged with the
+   *          generated ones derived from the annotationsMap. This would
+   *          typically be used for documents that were originally
+   *          derived from Twitter data, to re-create the original JSON.
+   * @param annotationTypeProperty if non-null, the annotation type will
+   *          be written as a property under this name, as if it were an
+   *          additional feature of each annotation.
+   * @param includeText should the "text" property be included?
+   * @param json the {@link JsonGenerator} to write to.
+   * @throws JsonGenerationException if a problem occurs while
+   *           generating the JSON
+   * @throws IOException if an I/O error occurs.
+   */
+  public static void writeDocument(Document doc, Long start, Long end,
+          Map<String, Collection<Annotation>> annotationsMap,
+          Map<?, ?> extraFeatures, String annotationTypeProperty,
+          boolean includeText, JsonGenerator json) throws 
JsonGenerationException, IOException,
+          InvalidOffsetException {
+
     ObjectWriter writer = MAPPER.writer();
 
     json.writeStartObject();
-    json.writeStringField("text", doc.getContent().getContent(start, end)
-            .toString());
+    if(includeText) {
+      json.writeStringField("text", doc.getContent().getContent(start, end)
+              .toString());
+    }
     json.writeFieldName("entities");
     json.writeStartObject();
     // if the extraFeatures already includes entities, merge them with

This was sent by the SourceForge.net collaborative development platform, the 
world's largest Open Source development site.


------------------------------------------------------------------------------
Comprehensive Server Monitoring with Site24x7.
Monitor 10 servers for $9/Month.
Get alerted through email, SMS, voice calls or mobile push notifications.
Take corrective actions from your mobile device.
http://p.sf.net/sfu/Zoho
_______________________________________________
GATE-cvs mailing list
GATE-cvs@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/gate-cvs

Reply via email to