This is an automated email from the ASF dual-hosted git repository. schor pushed a commit to branch enhancement/UIMA-6128-xml-1-1 in repository https://gitbox.apache.org/repos/asf/uima-uimaj.git
commit ddd62f661e4757af15a73700b899ad05bd27c366 Author: Marshall Schor <m...@schor.com> AuthorDate: Thu Jan 16 17:14:44 2020 -0500 [UIMA-6128] add XMI_1_1 and XCAS_1_1 as SerialFormat entries, and support serializing using these. --- .../java/org/apache/uima/cas/SerialFormat.java | 11 ++++ .../org/apache/uima/cas/impl/XCASSerializer.java | 27 +++++++++- .../org/apache/uima/cas/impl/XmiCasSerializer.java | 59 +++++++++++++++++++++- .../main/java/org/apache/uima/util/CasIOUtils.java | 6 ++- .../apache/uima/cas/impl/XmiCasSerializerTest.java | 8 ++- 5 files changed, 105 insertions(+), 6 deletions(-) diff --git a/uimaj-core/src/main/java/org/apache/uima/cas/SerialFormat.java b/uimaj-core/src/main/java/org/apache/uima/cas/SerialFormat.java index 8d84809..2478b8a 100644 --- a/uimaj-core/src/main/java/org/apache/uima/cas/SerialFormat.java +++ b/uimaj-core/src/main/java/org/apache/uima/cas/SerialFormat.java @@ -96,6 +96,17 @@ public enum SerialFormat { * used to reinitialize the CAS */ COMPRESSED_TSI("bcas"), + + /** + * XML-serialized CAS, using xml version 1.1 + */ + XCAS_1_1("xcas"), + + /** + * XML-serialized CAS, using xml version 1.1 + */ + XMI_1_1("xmi"), + ; private String defaultFileExtension; diff --git a/uimaj-core/src/main/java/org/apache/uima/cas/impl/XCASSerializer.java b/uimaj-core/src/main/java/org/apache/uima/cas/impl/XCASSerializer.java index e972675..e8acdd3 100644 --- a/uimaj-core/src/main/java/org/apache/uima/cas/impl/XCASSerializer.java +++ b/uimaj-core/src/main/java/org/apache/uima/cas/impl/XCASSerializer.java @@ -26,6 +26,8 @@ import java.util.List; import java.util.Map; import java.util.Vector; +import javax.xml.transform.OutputKeys; + import org.apache.uima.UimaContext; import org.apache.uima.cas.CAS; import org.apache.uima.cas.Feature; @@ -975,9 +977,32 @@ public class XCASSerializer { */ public static void serialize(CAS aCAS, OutputStream aStream, boolean isFormattedOutput) throws SAXException, IOException { + serialize(aCAS, aStream, isFormattedOutput, false); + } + + /** + * Serializes an XCAS to a stream. + * + * @param aCAS + * CAS to serialize. + * @param aStream + * output stream to which to write the XCAS XML document + * @param isFormattedOutput + * if true the XCAS will be serialized formatted * + * @param useXml_1_1 + * if true, the output serializer is set with the OutputKeys.VERSION to "1.1". + * @throws SAXException + * if a problem occurs during XCAS serialization + * @throws IOException + * if an I/O failure occurs + */ + public static void serialize(CAS aCAS, OutputStream aStream, boolean isFormattedOutput, boolean useXml_1_1) + throws SAXException, IOException { XCASSerializer xcasSerializer = new XCASSerializer(aCAS.getTypeSystem()); XMLSerializer sax2xml = new XMLSerializer(aStream, isFormattedOutput); + if (useXml_1_1) { + sax2xml.setOutputProperty(OutputKeys.VERSION,"1.1"); + } xcasSerializer.serialize(aCAS, sax2xml.getContentHandler()); } - } diff --git a/uimaj-core/src/main/java/org/apache/uima/cas/impl/XmiCasSerializer.java b/uimaj-core/src/main/java/org/apache/uima/cas/impl/XmiCasSerializer.java index e568509..07c8699 100644 --- a/uimaj-core/src/main/java/org/apache/uima/cas/impl/XmiCasSerializer.java +++ b/uimaj-core/src/main/java/org/apache/uima/cas/impl/XmiCasSerializer.java @@ -25,6 +25,10 @@ import java.util.Iterator; import java.util.List; import java.util.Map; +import javax.xml.parsers.ParserConfigurationException; +import javax.xml.parsers.SAXParserFactory; +import javax.xml.transform.OutputKeys; + import org.apache.uima.UIMAFramework; import org.apache.uima.UIMARuntimeException; import org.apache.uima.UimaContext; @@ -46,6 +50,8 @@ import org.xml.sax.Attributes; import org.xml.sax.ContentHandler; import org.xml.sax.ErrorHandler; import org.xml.sax.SAXException; +import org.xml.sax.SAXNotRecognizedException; +import org.xml.sax.SAXNotSupportedException; import org.xml.sax.helpers.AttributesImpl; /** @@ -119,7 +125,19 @@ public class XmiCasSerializer { /** Namespace URI to use for UIMA types that have no namespace (the "default pacakge" in Java) */ public static final String DEFAULT_NAMESPACE_URI = "http:///uima/noNamespace.ecore"; - + +// public final static boolean XML1_1_SUPPORTED; // assuming xml 1.1 is always supported in today's (2020) java's +// static { +// boolean v; +// try { +// v = SAXParserFactory.newInstance().getFeature("http://xml.org/sax/features/xml-1.1"); +// } catch (SAXNotRecognizedException | SAXNotSupportedException +// | ParserConfigurationException e) { +// v = false; +// } +// XML1_1_SUPPORTED = v; +// } + public final static String SYSTEM_LINE_FEED; static { String lf = System.getProperty("line.separator"); @@ -322,10 +340,47 @@ public class XmiCasSerializer { public static void serialize(CAS aCAS, TypeSystem aTargetTypeSystem, OutputStream aStream, boolean aPrettyPrint, XmiSerializationSharedData aSharedData, Marker aMarker) throws SAXException { + serialize(aCAS, aTargetTypeSystem, aStream, aPrettyPrint, aSharedData, aMarker, false); + } + + /** + * Serializes a Delta CAS to an XMI stream. This version of this method allows many options to be configured. + * + * + * @param aCAS + * CAS to serialize. + * @param aTargetTypeSystem + * type system to which the produced XMI will conform. Any types or features not in the + * target type system will not be serialized. A null value indicates that all types and features + * will be serialized. + * @param aStream + * output stream to which to write the XMI document + * @param aPrettyPrint + * if true the XML output will be formatted with newlines and indenting. If false it will be unformatted. + * @param aSharedData + * an optional container for data that is shared between the {@link XmiCasSerializer} and the {@link XmiCasDeserializer}. + * See the JavaDocs for {@link XmiSerializationSharedData} for details. + * @param aMarker + * an optional object that is used to filter and serialize a Delta CAS containing only + * those FSs and Views created after Marker was set and preexisting FSs and views that were modified. + * See the JavaDocs for {@link Marker} for details. + * @param useXml_1_1 + * if true, the output serializer is set with the OutputKeys.VERSION to "1.1". + * @throws SAXException + * if a problem occurs during XMI serialization + */ + public static void serialize(CAS aCAS, TypeSystem aTargetTypeSystem, OutputStream aStream, boolean aPrettyPrint, + XmiSerializationSharedData aSharedData, Marker aMarker, boolean useXml_1_1) + throws SAXException { XmiCasSerializer xmiCasSerializer = new XmiCasSerializer(aTargetTypeSystem); XMLSerializer sax2xml = new XMLSerializer(aStream, aPrettyPrint); + if (useXml_1_1) { + sax2xml.setOutputProperty(OutputKeys.VERSION,"1.1"); + } xmiCasSerializer.serialize(aCAS, sax2xml.getContentHandler(), null, aSharedData, aMarker); - } + } + + /*************************************************** * non-static XMI Serializer methods * diff --git a/uimaj-core/src/main/java/org/apache/uima/util/CasIOUtils.java b/uimaj-core/src/main/java/org/apache/uima/util/CasIOUtils.java index 1e5e6b2..9447830 100644 --- a/uimaj-core/src/main/java/org/apache/uima/util/CasIOUtils.java +++ b/uimaj-core/src/main/java/org/apache/uima/util/CasIOUtils.java @@ -30,7 +30,6 @@ import java.io.ObjectInputStream; import java.io.ObjectOutputStream; import java.io.OutputStream; import java.net.URL; -import java.util.Arrays; import org.apache.uima.UIMARuntimeException; import org.apache.uima.cas.CAS; @@ -57,7 +56,7 @@ import org.xml.sax.SAXException; * <ul> * <li>save and load CASes, and to</li> * <li>optionally include the CAS's Type System (abbreviated TS (only available for Compressed Form 6)) and optionally also include the CAS's indexes definition.</li> - * <li>The combinaton of Type System and Indexes definition is called TSI. + * <li>The combinatison of Type System and Indexes definition is called TSI. * <ul> * <li>The TSI's purpose: to replace the CAS's existing type system and index definition.</li> * <li>The TS's purpose: to specify the type system used in the serialized data for format Compressed Form 6, in order to allow deserializing into some other type system in the CAS, leniently.</li> @@ -447,6 +446,9 @@ public class CasIOUtils { case XMI: XmiCasSerializer.serialize(aCas, docOS); break; + case XMI_1_1: + XmiCasSerializer.serialize(aCas, null, docOS, false, null, null, true); + break; case XCAS: XCASSerializer.serialize(aCas, docOS, true); // true = formatted output break; diff --git a/uimaj-core/src/test/java/org/apache/uima/cas/impl/XmiCasSerializerTest.java b/uimaj-core/src/test/java/org/apache/uima/cas/impl/XmiCasSerializerTest.java index f14ecbc..fa4d491 100644 --- a/uimaj-core/src/test/java/org/apache/uima/cas/impl/XmiCasSerializerTest.java +++ b/uimaj-core/src/test/java/org/apache/uima/cas/impl/XmiCasSerializerTest.java @@ -31,9 +31,11 @@ import junit.framework.TestCase; import org.apache.uima.UIMAFramework; import org.apache.uima.cas.CAS; +import org.apache.uima.cas.SerialFormat; import org.apache.uima.resource.metadata.TypeSystemDescription; import org.apache.uima.test.junit_extension.JUnitExtension; import org.apache.uima.util.CasCreationUtils; +import org.apache.uima.util.CasIOUtils; import org.apache.uima.util.XMLInputSource; import org.apache.uima.util.XMLSerializer; import org.xml.sax.SAXNotRecognizedException; @@ -99,7 +101,7 @@ public class XmiCasSerializerTest extends TestCase { assertTrue("XMI serialization of document text with bad XML 1.0 char should throw exception", caughtException); - //but when XML 1.1 output is being generated, don't fail on control chracters which are valid in 1.1. + //but when XML 1.1 output is being generated, don't fail on control characters which are valid in 1.1. if (XML1_1_SUPPORTED) { out = new FileOutputStream(this.outputFile); try { @@ -110,6 +112,10 @@ public class XmiCasSerializerTest extends TestCase { finally { out.close(); } + + this.outputFile.delete(); + out = new FileOutputStream(this.outputFile); + CasIOUtils.save(cas, out, SerialFormat.XMI_1_1); } }