Revision: 18443
          http://sourceforge.net/p/gate/code/18443
Author:   ian_roberts
Date:     2014-11-07 17:06:12 +0000 (Fri, 07 Nov 2014)
Log Message:
-----------
On second thoughts, remove the option to exclude the text from serialized JSON 
as it opens all sorts of cans of worms when it comes to escaping of &, < and > 
as Twitter do (if there's no text in the JSON, should the annotation offsets 
still be adjusted as if the text had been re-escaped?)

Modified Paths:
--------------
    gate/trunk/plugins/Twitter/src/gate/corpora/export/GATEJsonExporter.java
    gate/trunk/src/main/gate/corpora/DocumentJsonUtils.java

Modified: 
gate/trunk/plugins/Twitter/src/gate/corpora/export/GATEJsonExporter.java
===================================================================
--- gate/trunk/plugins/Twitter/src/gate/corpora/export/GATEJsonExporter.java    
2014-11-07 16:48:19 UTC (rev 18442)
+++ gate/trunk/plugins/Twitter/src/gate/corpora/export/GATEJsonExporter.java    
2014-11-07 17:06:12 UTC (rev 18443)
@@ -35,7 +35,6 @@
 import java.io.OutputStream;
 import java.util.Collection;
 import java.util.HashMap;
-import java.util.HashSet;
 import java.util.Iterator;
 import java.util.LinkedHashMap;
 import java.util.Map;
@@ -90,16 +89,6 @@
    * No-op, exists only as a host for the parameter annotations.
    */
   @RunTime
-  @CreoleParameter(defaultValue = "true", comment = "Whether " +
-               "to include the document text as a \"text\" property in " +
-               "the output JSON")
-  public void setIncludeText(Boolean include) {}
-  public Boolean getIncludeText() { return null; }
-  
-  /**
-   * No-op, exists only as a host for the parameter annotations.
-   */
-  @RunTime
   @Optional
   @CreoleParameter(defaultValue = 
GateConstants.ORIGINAL_MARKUPS_ANNOT_SET_NAME,
           comment = "Annotation set in which the \"document " +
@@ -129,7 +118,6 @@
     super("GATE JSON", "json","application/json");
   }
 
-  @SuppressWarnings("unchecked")
   @Override
   public void export(Document doc, OutputStream out, FeatureMap options)
     throws IOException {
@@ -187,11 +175,9 @@
       AnnotationSet defaultEntitiesAS =
         doc.getAnnotations((String)options.get("entitiesAnnotationSetName"));
       
+      @SuppressWarnings("unchecked")
       Collection<String> types = 
(Collection<String>)options.get("annotationTypes");
       
-      boolean includeText = (options.containsKey("includeText")
-              ? ((Boolean)options.get("includeText")).booleanValue() : true);
-
       Map<String,Collection<Annotation>> annotationsMap = new 
LinkedHashMap<>();
       
       for (String type : types) {
@@ -215,7 +201,7 @@
         for(Map.Entry<String, Collection<Annotation>> entry : 
annotationsMap.entrySet()) {
           sortedAnnots.put(entry.getKey(), 
Utils.inDocumentOrder((AnnotationSet)entry.getValue()));
         }
-        DocumentJsonUtils.writeDocument(doc, 0L, Utils.end(doc), sortedAnnots, 
null, null, includeText, generator);
+        DocumentJsonUtils.writeDocument(doc, 0L, Utils.end(doc), sortedAnnots, 
null, null, generator);
       } else {
         for(Annotation docAnnot : Utils.inDocumentOrder(docAnnots)) {
           Map<String, Collection<Annotation>> coveredAnnotations = new 
HashMap<>();
@@ -225,7 +211,7 @@
                             Utils.start(docAnnot), Utils.end(docAnnot))));
           }
           DocumentJsonUtils.writeDocument(doc, Utils.start(docAnnot), 
Utils.end(docAnnot),
-                  coveredAnnotations, docAnnot.getFeatures(), null, 
includeText, generator);
+                  coveredAnnotations, docAnnot.getFeatures(), null, generator);
         }
       }
     } catch(InvalidOffsetException e) {

Modified: gate/trunk/src/main/gate/corpora/DocumentJsonUtils.java
===================================================================
--- gate/trunk/src/main/gate/corpora/DocumentJsonUtils.java     2014-11-07 
16:48:19 UTC (rev 18442)
+++ gate/trunk/src/main/gate/corpora/DocumentJsonUtils.java     2014-11-07 
17:06:12 UTC (rev 18443)
@@ -229,7 +229,7 @@
   public static void writeDocument(Document doc, Long start, Long end,
           Map<String, Collection<Annotation>> annotationsMap, JsonGenerator 
json)
           throws JsonGenerationException, IOException, InvalidOffsetException {
-    writeDocument(doc, start, end, annotationsMap, null, null, true, json);
+    writeDocument(doc, start, end, annotationsMap, null, null, json);
   }
 
   /**
@@ -259,7 +259,7 @@
           Map<String, Collection<Annotation>> annotationsMap,
           Map<?, ?> extraFeatures, JsonGenerator json)
           throws JsonGenerationException, IOException, InvalidOffsetException {
-    writeDocument(doc, start, end, annotationsMap, extraFeatures, null, true, 
json);
+    writeDocument(doc, start, end, annotationsMap, extraFeatures, null, json);
   }
 
   /**
@@ -292,49 +292,11 @@
           Map<?, ?> extraFeatures, String annotationTypeProperty,
           JsonGenerator json) throws JsonGenerationException, IOException,
           InvalidOffsetException {
-    writeDocument(doc, start, end, annotationsMap, extraFeatures,
-            annotationTypeProperty, true, json);
-  }
-
-  /**
-   * Write a substring of a GATE document to the specified
-   * JsonGenerator. The specified window of document text will
-   * optionally be written as a property named "text" and the specified
-   * annotations will be written as "entities", with their offsets
-   * adjusted to be relative to the specified window.
-   * 
-   * @param doc the document to write
-   * @param start the start offset of the segment to write
-   * @param end the end offset of the segment to write
-   * @param extraFeatures additional properties to add to the generated
-   *          JSON. If the map includes a "text" key this will be
-   *          ignored, and if it contains a key "entities" whose value
-   *          is a map then these entities will be merged with the
-   *          generated ones derived from the annotationsMap. This would
-   *          typically be used for documents that were originally
-   *          derived from Twitter data, to re-create the original JSON.
-   * @param annotationTypeProperty if non-null, the annotation type will
-   *          be written as a property under this name, as if it were an
-   *          additional feature of each annotation.
-   * @param includeText should the "text" property be included?
-   * @param json the {@link JsonGenerator} to write to.
-   * @throws JsonGenerationException if a problem occurs while
-   *           generating the JSON
-   * @throws IOException if an I/O error occurs.
-   */
-  public static void writeDocument(Document doc, Long start, Long end,
-          Map<String, Collection<Annotation>> annotationsMap,
-          Map<?, ?> extraFeatures, String annotationTypeProperty,
-          boolean includeText, JsonGenerator json) throws 
JsonGenerationException, IOException,
-          InvalidOffsetException {
-
     ObjectWriter writer = MAPPER.writer();
 
     json.writeStartObject();
-    if(includeText) {
-      json.writeStringField("text", doc.getContent().getContent(start, end)
-              .toString());
-    }
+    json.writeStringField("text", doc.getContent().getContent(start, end)
+            .toString());
     json.writeFieldName("entities");
     json.writeStartObject();
     // if the extraFeatures already includes entities, merge them with

This was sent by the SourceForge.net collaborative development platform, the 
world's largest Open Source development site.


------------------------------------------------------------------------------
_______________________________________________
GATE-cvs mailing list
GATE-cvs@lists.sourceforge.net
https://lists.sourceforge.net/lists/listinfo/gate-cvs

Reply via email to