Revision: 18443 http://sourceforge.net/p/gate/code/18443 Author: ian_roberts Date: 2014-11-07 17:06:12 +0000 (Fri, 07 Nov 2014) Log Message: ----------- On second thoughts, remove the option to exclude the text from serialized JSON as it opens all sorts of cans of worms when it comes to escaping of &, < and > as Twitter do (if there's no text in the JSON, should the annotation offsets still be adjusted as if the text had been re-escaped?)
Modified Paths: -------------- gate/trunk/plugins/Twitter/src/gate/corpora/export/GATEJsonExporter.java gate/trunk/src/main/gate/corpora/DocumentJsonUtils.java Modified: gate/trunk/plugins/Twitter/src/gate/corpora/export/GATEJsonExporter.java =================================================================== --- gate/trunk/plugins/Twitter/src/gate/corpora/export/GATEJsonExporter.java 2014-11-07 16:48:19 UTC (rev 18442) +++ gate/trunk/plugins/Twitter/src/gate/corpora/export/GATEJsonExporter.java 2014-11-07 17:06:12 UTC (rev 18443) @@ -35,7 +35,6 @@ import java.io.OutputStream; import java.util.Collection; import java.util.HashMap; -import java.util.HashSet; import java.util.Iterator; import java.util.LinkedHashMap; import java.util.Map; @@ -90,16 +89,6 @@ * No-op, exists only as a host for the parameter annotations. */ @RunTime - @CreoleParameter(defaultValue = "true", comment = "Whether " + - "to include the document text as a \"text\" property in " + - "the output JSON") - public void setIncludeText(Boolean include) {} - public Boolean getIncludeText() { return null; } - - /** - * No-op, exists only as a host for the parameter annotations. - */ - @RunTime @Optional @CreoleParameter(defaultValue = GateConstants.ORIGINAL_MARKUPS_ANNOT_SET_NAME, comment = "Annotation set in which the \"document " + @@ -129,7 +118,6 @@ super("GATE JSON", "json","application/json"); } - @SuppressWarnings("unchecked") @Override public void export(Document doc, OutputStream out, FeatureMap options) throws IOException { @@ -187,11 +175,9 @@ AnnotationSet defaultEntitiesAS = doc.getAnnotations((String)options.get("entitiesAnnotationSetName")); + @SuppressWarnings("unchecked") Collection<String> types = (Collection<String>)options.get("annotationTypes"); - boolean includeText = (options.containsKey("includeText") - ? ((Boolean)options.get("includeText")).booleanValue() : true); - Map<String,Collection<Annotation>> annotationsMap = new LinkedHashMap<>(); for (String type : types) { @@ -215,7 +201,7 @@ for(Map.Entry<String, Collection<Annotation>> entry : annotationsMap.entrySet()) { sortedAnnots.put(entry.getKey(), Utils.inDocumentOrder((AnnotationSet)entry.getValue())); } - DocumentJsonUtils.writeDocument(doc, 0L, Utils.end(doc), sortedAnnots, null, null, includeText, generator); + DocumentJsonUtils.writeDocument(doc, 0L, Utils.end(doc), sortedAnnots, null, null, generator); } else { for(Annotation docAnnot : Utils.inDocumentOrder(docAnnots)) { Map<String, Collection<Annotation>> coveredAnnotations = new HashMap<>(); @@ -225,7 +211,7 @@ Utils.start(docAnnot), Utils.end(docAnnot)))); } DocumentJsonUtils.writeDocument(doc, Utils.start(docAnnot), Utils.end(docAnnot), - coveredAnnotations, docAnnot.getFeatures(), null, includeText, generator); + coveredAnnotations, docAnnot.getFeatures(), null, generator); } } } catch(InvalidOffsetException e) { Modified: gate/trunk/src/main/gate/corpora/DocumentJsonUtils.java =================================================================== --- gate/trunk/src/main/gate/corpora/DocumentJsonUtils.java 2014-11-07 16:48:19 UTC (rev 18442) +++ gate/trunk/src/main/gate/corpora/DocumentJsonUtils.java 2014-11-07 17:06:12 UTC (rev 18443) @@ -229,7 +229,7 @@ public static void writeDocument(Document doc, Long start, Long end, Map<String, Collection<Annotation>> annotationsMap, JsonGenerator json) throws JsonGenerationException, IOException, InvalidOffsetException { - writeDocument(doc, start, end, annotationsMap, null, null, true, json); + writeDocument(doc, start, end, annotationsMap, null, null, json); } /** @@ -259,7 +259,7 @@ Map<String, Collection<Annotation>> annotationsMap, Map<?, ?> extraFeatures, JsonGenerator json) throws JsonGenerationException, IOException, InvalidOffsetException { - writeDocument(doc, start, end, annotationsMap, extraFeatures, null, true, json); + writeDocument(doc, start, end, annotationsMap, extraFeatures, null, json); } /** @@ -292,49 +292,11 @@ Map<?, ?> extraFeatures, String annotationTypeProperty, JsonGenerator json) throws JsonGenerationException, IOException, InvalidOffsetException { - writeDocument(doc, start, end, annotationsMap, extraFeatures, - annotationTypeProperty, true, json); - } - - /** - * Write a substring of a GATE document to the specified - * JsonGenerator. The specified window of document text will - * optionally be written as a property named "text" and the specified - * annotations will be written as "entities", with their offsets - * adjusted to be relative to the specified window. - * - * @param doc the document to write - * @param start the start offset of the segment to write - * @param end the end offset of the segment to write - * @param extraFeatures additional properties to add to the generated - * JSON. If the map includes a "text" key this will be - * ignored, and if it contains a key "entities" whose value - * is a map then these entities will be merged with the - * generated ones derived from the annotationsMap. This would - * typically be used for documents that were originally - * derived from Twitter data, to re-create the original JSON. - * @param annotationTypeProperty if non-null, the annotation type will - * be written as a property under this name, as if it were an - * additional feature of each annotation. - * @param includeText should the "text" property be included? - * @param json the {@link JsonGenerator} to write to. - * @throws JsonGenerationException if a problem occurs while - * generating the JSON - * @throws IOException if an I/O error occurs. - */ - public static void writeDocument(Document doc, Long start, Long end, - Map<String, Collection<Annotation>> annotationsMap, - Map<?, ?> extraFeatures, String annotationTypeProperty, - boolean includeText, JsonGenerator json) throws JsonGenerationException, IOException, - InvalidOffsetException { - ObjectWriter writer = MAPPER.writer(); json.writeStartObject(); - if(includeText) { - json.writeStringField("text", doc.getContent().getContent(start, end) - .toString()); - } + json.writeStringField("text", doc.getContent().getContent(start, end) + .toString()); json.writeFieldName("entities"); json.writeStartObject(); // if the extraFeatures already includes entities, merge them with This was sent by the SourceForge.net collaborative development platform, the world's largest Open Source development site. ------------------------------------------------------------------------------ _______________________________________________ GATE-cvs mailing list GATE-cvs@lists.sourceforge.net https://lists.sourceforge.net/lists/listinfo/gate-cvs