Author: schor
Date: Sat Aug 27 14:29:59 2016
New Revision: 1758045
URL: http://svn.apache.org/viewvc?rev=1758045&view=rev
Log:
[UIMA-4685] catchup merge from trunk
Added:
uima/uimaj/branches/experiment-v3-jcas/uimaj-core/src/main/java/org/apache/uima/util/CasIOUtils.java
uima/uimaj/branches/experiment-v3-jcas/uimaj-core/src/main/java/org/apache/uima/util/CasLoadMode.java
Modified:
uima/uimaj/branches/experiment-v3-jcas/uimaj-core/src/main/java/org/apache/uima/cas/CASRuntimeException.java
uima/uimaj/branches/experiment-v3-jcas/uimaj-core/src/main/java/org/apache/uima/cas/SerialFormat.java
(contents, props changed)
uima/uimaj/branches/experiment-v3-jcas/uimaj-core/src/main/java/org/apache/uima/cas/impl/BinaryCasSerDes.java
uima/uimaj/branches/experiment-v3-jcas/uimaj-core/src/main/java/org/apache/uima/cas/impl/BinaryCasSerDes4.java
(contents, props changed)
uima/uimaj/branches/experiment-v3-jcas/uimaj-core/src/main/java/org/apache/uima/cas/impl/BinaryCasSerDes6.java
(contents, props changed)
uima/uimaj/branches/experiment-v3-jcas/uimaj-core/src/main/java/org/apache/uima/cas/impl/CASImpl.java
uima/uimaj/branches/experiment-v3-jcas/uimaj-core/src/main/java/org/apache/uima/cas/impl/CASSerializer.java
(contents, props changed)
uima/uimaj/branches/experiment-v3-jcas/uimaj-core/src/main/java/org/apache/uima/cas/impl/CommonSerDes.java
uima/uimaj/branches/experiment-v3-jcas/uimaj-core/src/main/java/org/apache/uima/cas/impl/Serialization.java
(contents, props changed)
uima/uimaj/branches/experiment-v3-jcas/uimaj-core/src/main/java/org/apache/uima/cas/impl/XmiCasDeserializer.java
uima/uimaj/branches/experiment-v3-jcas/uimaj-core/src/main/java/org/apache/uima/util/XmlCasDeserializer.java
(contents, props changed)
uima/uimaj/branches/experiment-v3-jcas/uimaj-core/src/main/resources/org/apache/uima/UIMAException_Messages.properties
Modified:
uima/uimaj/branches/experiment-v3-jcas/uimaj-core/src/main/java/org/apache/uima/cas/CASRuntimeException.java
URL:
http://svn.apache.org/viewvc/uima/uimaj/branches/experiment-v3-jcas/uimaj-core/src/main/java/org/apache/uima/cas/CASRuntimeException.java?rev=1758045&r1=1758044&r2=1758045&view=diff
==============================================================================
---
uima/uimaj/branches/experiment-v3-jcas/uimaj-core/src/main/java/org/apache/uima/cas/CASRuntimeException.java
(original)
+++
uima/uimaj/branches/experiment-v3-jcas/uimaj-core/src/main/java/org/apache/uima/cas/CASRuntimeException.java
Sat Aug 27 14:29:59 2016
@@ -191,11 +191,17 @@ public class CASRuntimeException extends
*/
public static final String BLOB_SERIALIZATION = "BLOB_SERIALIZATION";
+ /** Unrecognized serialized CAS format. */
+ public static final String UNRECOGNIZED_SERIALIZED_CAS_FORMAT =
"UNRECOGNIZED_SERIALIZED_CAS_FORMAT";
+
/**
* Error trying to read BLOB data from an input stream and deserialize
Stringo a CAS.
*/
public static final String BLOB_DESERIALIZATION =
"BLOB_DESERIALIZATION";
+ /** Deserializing Compressed Form 6 with CasLoadMode LENIENT, but no Type
System provided. */
+ public static final String LENIENT_FORM_6_NO_TS = "LENIENT_FORM_6_NO_TS";
+
/** Error trying to open a stream to Sofa data. */
public static final String SOFADATASTREAM_ERROR =
"SOFADATASTREAM_ERROR";
@@ -254,6 +260,9 @@ public class CASRuntimeException extends
/** While FS was in the index, illegal attempt to modify Feature "{0}" which
is used as a key in one or more indexes; FS = "{1}" */
public static final String ILLEGAL_FEAT_SET = "ILLEGAL_FEAT_SET";
+ /** Lenient deserialization not support for input of type {0}. */
+ public static final String LENIENT_NOT_SUPPORTED = "LENIENT_NOT_SUPPORTED";
+
/** ll_setIntValue call to change the type: new type "{0}" must be a subtype
of existing type {1}.*/
public static final String ILLEGAL_TYPE_CHANGE = "ILLEGAL_TYPE_CHANGE";
Modified:
uima/uimaj/branches/experiment-v3-jcas/uimaj-core/src/main/java/org/apache/uima/cas/SerialFormat.java
URL:
http://svn.apache.org/viewvc/uima/uimaj/branches/experiment-v3-jcas/uimaj-core/src/main/java/org/apache/uima/cas/SerialFormat.java?rev=1758045&r1=1758044&r2=1758045&view=diff
==============================================================================
---
uima/uimaj/branches/experiment-v3-jcas/uimaj-core/src/main/java/org/apache/uima/cas/SerialFormat.java
(original)
+++
uima/uimaj/branches/experiment-v3-jcas/uimaj-core/src/main/java/org/apache/uima/cas/SerialFormat.java
Sat Aug 27 14:29:59 2016
@@ -23,11 +23,89 @@ package org.apache.uima.cas;
*
*/
public enum SerialFormat {
- UNKNOWN,
- XCAS, // with reachability filtering
- XMI, // with reachability filtering
- BINARY, // no filtering
- COMPRESSED, // no filtering (form 4)
- COMPRESSED_FILTERED, // with reachability and type and feature filtering
(form 6)
- COMPRESSED_PROJECTION, // with subset of views
+
+ /**
+ * Unknown format
+ */
+ UNKNOWN(""),
+
+ /**
+ * XML-serialized CAS
+ */
+ XCAS("xcas"),
+
+ /**
+ * XML-serialized CAS
+ */
+ XMI("xmi"),
+
+ /**
+ * Plain custom binary serialized CAS without type system, no filtering
+ */
+ BINARY("bcas"),
+
+ /**
+ * Binary compressed CAS without type system, no filtering (form 4)
+ */
+ COMPRESSED("bcas"),
+
+ /**
+ * Binary compressed CAS with reachability and type and feature filtering
(form 6)
+ */
+ COMPRESSED_FILTERED("bcas"),
+
+ /**
+ * with subset of views (not in use)
+ */
+ COMPRESSED_PROJECTION("bcas"),
+
+ /**
+ * Java-serialized CAS without type system
+ */
+ SERIALIZED("scas"),
+
+ /**
+ * Java-serialized CAS with type system and index definitions
+ * The Typs System and Index Definition replaces the CAS's when
deserializing.
+ */
+ SERIALIZED_TSI("scas"),
+
+ /**
+ * Binary compressed form 6 CAS with embedded type system
+ * representing the type system encoding the serialization
+ *
+ * specifies the type system used for the serialized form
+ */
+ COMPRESSED_FILTERED_TS("bcas"),
+
+ /**
+ * Type system and index specification included
+ * used to reinitialize the CAS and
+ * specifies the type system used for the serialized form
+ */
+ COMPRESSED_FILTERED_TSI("bcas"),
+
+ /**
+ * Plain custom binary serialized CAS, no filtering, plus serialized TSI
+ * used to reinitialize the CAS
+ */
+ BINARY_TSI("bcas"),
+
+ /**
+ * Binary Compressed Form 4, plus serialized TSI
+ * used to reinitialize the CAS
+ */
+ COMPRESSED_TSI("bcas"),
+ ;
+
+ private String defaultFileExtension;
+
+ SerialFormat(String defaultFileExtension) {
+ this.defaultFileExtension = defaultFileExtension;
+ }
+
+ public String getDefaultFileExtension() {
+ return defaultFileExtension;
+ }
+
}
Propchange:
uima/uimaj/branches/experiment-v3-jcas/uimaj-core/src/main/java/org/apache/uima/cas/SerialFormat.java
------------------------------------------------------------------------------
--- svn:mergeinfo (added)
+++ svn:mergeinfo Sat Aug 27 14:29:59 2016
@@ -0,0 +1,8 @@
+/incubator/uima/uimaj/branches/mavenAlign/uimaj-core/src/main/java/org/apache/uima/cas/SerialFormat.java:932400-933272
+/uima/uimaj/branches/2.6.0-json/uimaj-core/src/main/java/org/apache/uima/cas/SerialFormat.java:1616936-1617592
+/uima/uimaj/branches/depend-on-july-9-build-tools/uimaj-core/src/main/java/org/apache/uima/cas/SerialFormat.java:963167-964468
+/uima/uimaj/branches/depend-on-parent-pom-4/uimaj-core/src/main/java/org/apache/uima/cas/SerialFormat.java:961329-961745
+/uima/uimaj/branches/filteredCompress-uima-2498/uimaj-core/src/main/java/org/apache/uima/cas/SerialFormat.java:1436573-1462257
+/uima/uimaj/branches/mavenAlign/uimaj-core/src/main/java/org/apache/uima/cas/SerialFormat.java:933273-944396
+/uima/uimaj/branches/test-parent-pom-6/uimaj-core/src/main/java/org/apache/uima/cas/SerialFormat.java:1024030
+/uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/cas/SerialFormat.java:1690273-1757435
Modified:
uima/uimaj/branches/experiment-v3-jcas/uimaj-core/src/main/java/org/apache/uima/cas/impl/BinaryCasSerDes.java
URL:
http://svn.apache.org/viewvc/uima/uimaj/branches/experiment-v3-jcas/uimaj-core/src/main/java/org/apache/uima/cas/impl/BinaryCasSerDes.java?rev=1758045&r1=1758044&r2=1758045&view=diff
==============================================================================
---
uima/uimaj/branches/experiment-v3-jcas/uimaj-core/src/main/java/org/apache/uima/cas/impl/BinaryCasSerDes.java
(original)
+++
uima/uimaj/branches/experiment-v3-jcas/uimaj-core/src/main/java/org/apache/uima/cas/impl/BinaryCasSerDes.java
Sat Aug 27 14:29:59 2016
@@ -22,6 +22,7 @@ package org.apache.uima.cas.impl;
import java.io.DataInputStream;
import java.io.IOException;
import java.io.InputStream;
+import java.io.ObjectInputStream;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.Collection;
@@ -29,7 +30,6 @@ import java.util.Collections;
import java.util.List;
import java.util.Set;
import java.util.function.Consumer;
-import java.util.function.IntConsumer;
import java.util.function.IntFunction;
import org.apache.uima.cas.CAS;
@@ -58,6 +58,7 @@ import org.apache.uima.jcas.cas.Sofa;
import org.apache.uima.jcas.cas.StringArray;
import org.apache.uima.jcas.cas.TOP;
import org.apache.uima.resource.ResourceInitializationException;
+import org.apache.uima.util.CasLoadMode;
/**
* Binary (mostly non compressed) CAS deserialization
@@ -244,6 +245,34 @@ public class BinaryCasSerDes {
reinitIndexedFSs(fsIndex, false, i -> csds.addr2fs.get(i));
}
+ public CASImpl setupCasFromCasMgrSerializer(CASMgrSerializer
casMgrSerializer) {
+
+ if (null != casMgrSerializer) {
+
+ TypeSystemImpl ts = casMgrSerializer.getTypeSystem();
+ baseCas.installTypeSystemInAllViews(ts);
+ baseCas.commitTypeSystem();
+
+ // reset index repositories -- wipes out Sofa index
+ baseCas.indexRepository = casMgrSerializer.getIndexRepository(baseCas);
+ baseCas.indexRepository.commit();
+
+ // get handle to existing initial View
+ CASImpl initialView = baseCas.getInitialView();
+
+ // throw away all other View information as the CAS definition may have
+ // changed
+ baseCas.svd.sofa2indexMap.clear();
+ baseCas.svd.sofaNbr2ViewMap.clear();
+ baseCas.svd.viewCount = 0;
+
+ // freshen the initial view
+ initialView.refreshView(baseCas, null);
+ baseCas.setViewForSofaNbr(1, initialView);
+ baseCas.svd.viewCount = 1;
+ }
+ return baseCas;
+ }
/**
* Deserializer for CASCompleteSerializer instances - includes type system
and index definitions
@@ -421,18 +450,77 @@ public class BinaryCasSerDes {
*/
public SerialFormat reinit(InputStream istream) throws CASRuntimeException {
-
+
final DataInputStream dis =
CommonSerDes.maybeWrapToDataInputStream(istream);
try {
Header h = CommonSerDes.readHeader(dis);
+ return reinit(h, istream, null, CasLoadMode.DEFAULT, null,
AllowPreexistingFS.allow, null);
+ } catch (IOException e) {
+ String msg = e.getMessage();
+ if (msg == null) {
+ msg = e.toString();
+ }
+ throw new CASRuntimeException(CASRuntimeException.BLOB_DESERIALIZATION,
msg);
+ }
+ }
+
+ /**
+ * ---------------------------------------------------------------------
+ * Deserialize a binary input stream, after reading the header,
+ * and optionally an externally provided type system and index spec
+ * used in compressed form 6 serialization previously
+ *
+ * This reads in and deserializes CAS data from a stream. Byte swapping may
be
+ * needed if the blob is from C++ -- C++ blob serialization writes data in
+ * native byte order.
+ *
+ * The corresponding serialization code is in
org.apache.uima.cas.impl.Serialization,
+ * also see CasIOUtils
+ *
+ * @param h -
+ * @param istream -
+ * @param casMgrSerializer null or the Java object representing the
externally supplied type
+ * and maybe indexes definition (TSI)
+ * @param casLoadMode DEFAULT or REINIT. REINIT required with compressed
form 6 to
+ * reinitialize the cas's type system and index
definition, for form 6.
+ * @param f6 only used for form 6 where an instance of BinaryCasSerDes6 has
been initialized
+ * @param allowPreexistingFS only used for form 6 delta deserialization
+ * @return -
+ * @throws CASRuntimeException wraps IOException
+ */
+ public SerialFormat reinit(Header h,
+ InputStream istream,
+ CASMgrSerializer casMgrSerializer,
+ CasLoadMode casLoadMode,
+ BinaryCasSerDes6 f6,
+ AllowPreexistingFS allowPreexistingFS,
+ TypeSystemImpl ts) throws CASRuntimeException {
+
+ final DataInputStream dis =
CommonSerDes.maybeWrapToDataInputStream(istream);
+
+ CASMgrSerializer embeddedCasMgrSerializer = maybeReadEmbeddedTSI(h, dis);
+
+ if (!h.isForm6() || casLoadMode == CasLoadMode.REINIT) {
+ setupCasFromCasMgrSerializer(
+ (null != embeddedCasMgrSerializer &&
embeddedCasMgrSerializer.hasIndexRepository())
+ ? embeddedCasMgrSerializer
+ : casMgrSerializer);
+ }
+ if (!h.isForm6() && casLoadMode == CasLoadMode.LENIENT) {
+ /**Lenient deserialization not support for input of type {0}.*/
+ throw new CASRuntimeException(CASRuntimeException.LENIENT_NOT_SUPPORTED,
new Object[] {h.toString()});
+ }
+
+ try {
final boolean delta = h.isDelta;
if (!delta) {
baseCas.resetNoQuestions();
}
+
if (h.isCompressed) {
if (TRACE_DESER) {
System.out.format("BinDeser version = %d%n", h.v);
@@ -440,28 +528,64 @@ public class BinaryCasSerDes {
if (h.form4) {
(new BinaryCasSerDes4(baseCas.getTypeSystemImpl(), false))
.deserialize(baseCas, dis, delta, h.v);
- return SerialFormat.COMPRESSED;
+ return h.typeSystemIndexDefIncluded ? SerialFormat.COMPRESSED_TSI :
SerialFormat.COMPRESSED;
} else {
+ CASMgrSerializer cms = (embeddedCasMgrSerializer != null) ?
embeddedCasMgrSerializer : casMgrSerializer;
+ TypeSystemImpl tsRead = (cms != null) ? cms.getTypeSystem() : null;
+ if (null != tsRead) {
+ tsRead.commit(); // no generators set up
+ }
+
+ TypeSystemImpl ts_for_decoding =
+ (tsRead != null && embeddedCasMgrSerializer != null)
+ ? tsRead // first choice: embedded - it's
always correct
+ : (ts != null) // 2nd choice is passed in ts
arg, either ts or f6.getTgtTs()
+ ? ts
+ : (f6 != null && f6.getTgtTs() != null)
+ ? f6.getTgtTs() // this is the ts passed in via
BinaryCasSerDes6 constructor
+ : tsRead; // last choice: the ts read from
2nd input to load() in CasIOUtils
+
try {
- (new BinaryCasSerDes6(baseCas)).deserializeAfterVersion(dis,
delta, AllowPreexistingFS.allow);
+ BinaryCasSerDes6 bcsd = (f6 != null)
+ ? new BinaryCasSerDes6(f6, ts_for_decoding)
+ : new BinaryCasSerDes6(baseCas, ts_for_decoding);
+ bcsd.deserializeAfterVersion(dis, delta, AllowPreexistingFS.allow);
+ return h.typeSystemIndexDefIncluded
+ ? SerialFormat.COMPRESSED_FILTERED_TSI
+ : h.typeSystemIncluded
+ ? SerialFormat.COMPRESSED_FILTERED_TS
+ : SerialFormat.COMPRESSED_FILTERED;
} catch (ResourceInitializationException e) {
throw new
CASRuntimeException(CASRuntimeException.DESERIALIZING_COMPRESSED_BINARY_UNSUPPORTED,
null, e);
}
- return SerialFormat.COMPRESSED_FILTERED;
}
}
return binaryDeserialization(h);
-
} catch (IOException e) {
String msg = e.getMessage();
if (msg == null) {
msg = e.toString();
}
throw new CASRuntimeException(CASRuntimeException.BLOB_DESERIALIZATION,
msg);
+ }
+
+ }
+
+ static CASMgrSerializer maybeReadEmbeddedTSI(Header h, DataInputStream dis)
{
+ if (h.isTypeSystemIncluded() || h.isTypeSystemIndexDefIncluded()) { //
Load TS from CAS stream
+ try {
+ ObjectInputStream ois = new ObjectInputStream(dis);
+ return (CASMgrSerializer) ois.readObject();
+ } catch (ClassNotFoundException | IOException e) {
+ /**Unrecognized serialized CAS format*/
+ throw new
CASRuntimeException(CASRuntimeException.UNRECOGNIZED_SERIALIZED_CAS_FORMAT,
null, e);
+ }
}
+ return null;
}
+
/************************************************************
* ------ NON COMPRESSED BINARY DESEERIALIZATION ------ *
* For corresponding serialization code, see CASSerializer *
Modified:
uima/uimaj/branches/experiment-v3-jcas/uimaj-core/src/main/java/org/apache/uima/cas/impl/BinaryCasSerDes4.java
URL:
http://svn.apache.org/viewvc/uima/uimaj/branches/experiment-v3-jcas/uimaj-core/src/main/java/org/apache/uima/cas/impl/BinaryCasSerDes4.java?rev=1758045&r1=1758044&r2=1758045&view=diff
==============================================================================
---
uima/uimaj/branches/experiment-v3-jcas/uimaj-core/src/main/java/org/apache/uima/cas/impl/BinaryCasSerDes4.java
(original)
+++
uima/uimaj/branches/experiment-v3-jcas/uimaj-core/src/main/java/org/apache/uima/cas/impl/BinaryCasSerDes4.java
Sat Aug 27 14:29:59 2016
@@ -37,7 +37,6 @@ import java.io.OutputStream;
import java.util.ArrayList;
import java.util.Arrays;
import java.util.BitSet;
-import java.util.Collections;
import java.util.List;
import java.util.function.Consumer;
import java.util.zip.Deflater;
@@ -340,12 +339,18 @@ public class BinaryCasSerDes4 implements
Serializer serializer = new Serializer(
casImpl, makeDataOutputStream(out), (MarkerImpl) trackingMark, sm,
- compressLevel, compressStrategy);
+ compressLevel, compressStrategy, false);
serializer.serialize();
return sm;
}
+ public void serializeWithTsi(CASImpl casImpl, Object out) throws IOException
{
+ Serializer serializer = new Serializer(
+ casImpl, makeDataOutputStream(out), null, null, CompressLevel.Default,
CompressStrat.Default, true);
+ serializer.serialize();
+ }
+
public SerializationMeasures serialize(AbstractCas cas, Object out, Marker
trackingMark,
CompressLevel compressLevel) throws IOException {
return serialize(cas, out,trackingMark, compressLevel,
CompressStrat.Default);
@@ -391,6 +396,7 @@ public class BinaryCasSerDes4 implements
// final private ByteHeap byteHeapObj;
final private boolean isDelta; // if true, there is a marker
indicating the start spot(s)
+ final private boolean isTsi; // true to include the type system
and indexes definition
final private boolean doMeasurement; // if true, doing measurements
// final private ComprItemRefs fsStartIndexes =
(CHANGE_FS_REFS_TO_SEQUENTIAL) ? new ComprItemRefs() : null;
// final private int[] typeCodeHisto = new int[ts.getTypeArraySize()];
@@ -460,7 +466,8 @@ public class BinaryCasSerDes4 implements
private Serializer(CASImpl cas, DataOutputStream serializedOut, MarkerImpl
mark,
SerializationMeasures sm,
CompressLevel compressLevel,
- CompressStrat compressStrategy) {
+ CompressStrat compressStrategy,
+ boolean isTsi) {
this.baseCas = cas.getBaseCAS();
this.bcsd = cas.getBinaryCasSerDes();
this.isDelta = (mark != null);
@@ -473,6 +480,7 @@ public class BinaryCasSerDes4 implements
this.sm = sm;
this.compressLevel = compressLevel;
this.compressStrategy = compressStrategy;
+ this.isTsi = isTsi;
doMeasurement = (sm != null);
@@ -532,16 +540,17 @@ public class BinaryCasSerDes4 implements
// sm.origAuxLongs = baseCas.getLongHeap().getSize() * 8;
// sm.totalTime = System.currentTimeMillis();
// }
-
+
/************************
* Write standard header
************************/
CommonSerDes.createHeader()
- .v3()
- .seqVer(2) // 0 - original, 1 - UIMA-4743, 2 - v3
- .form4()
- .delta(isDelta)
- .write(serializedOut);
+ .v3()
+ .seqVer(2) // 0 - original, 1 - UIMA-4743, 2 - v3
+ .form4()
+ .delta(isDelta)
+ .typeSystemIndexDefIncluded(isTsi)
+ .write(serializedOut);
if (TRACE_SER) System.out.println("Form4Ser start, delta: " + (isDelta ?
"true" : "false"));
/*******************************************************************************
@@ -983,14 +992,14 @@ public class BinaryCasSerDes4 implements
DeflaterOutputStream cds = new DeflaterOutputStream(baosZipped,
deflater, zipBufSize);
baos.writeTo(cds);
cds.close();
- idxAndLen.add(i);
+ idxAndLen.add(Integer.valueOf(i));
if (doMeasurement) {
- idxAndLen.add((int)(sm.statDetails[i].afterZip =
deflater.getBytesWritten()));
- idxAndLen.add((int)(sm.statDetails[i].beforeZip =
deflater.getBytesRead()));
+ idxAndLen.add(Integer.valueOf((int)(sm.statDetails[i].afterZip =
deflater.getBytesWritten())));
+ idxAndLen.add(Integer.valueOf((int)(sm.statDetails[i].beforeZip =
deflater.getBytesRead())));
sm.statDetails[i].zipTime = System.currentTimeMillis() - startTime;
} else {
- idxAndLen.add((int)deflater.getBytesWritten());
- idxAndLen.add((int)deflater.getBytesRead());
+ idxAndLen.add(Integer.valueOf((int)deflater.getBytesWritten()));
+ idxAndLen.add(Integer.valueOf((int)deflater.getBytesRead()));
}
}
}
@@ -1672,7 +1681,8 @@ public class BinaryCasSerDes4 implements
}
private void deserialize(int version1) throws IOException {
-
+ if (TRACE_DES) System.out.println("Form4Deser starting");
+
if (TRACE_DES) System.out.println("Form4Deser starting");
isBeforeV3 = (version1 & 0xff00) == 0;
Propchange:
uima/uimaj/branches/experiment-v3-jcas/uimaj-core/src/main/java/org/apache/uima/cas/impl/BinaryCasSerDes4.java
------------------------------------------------------------------------------
--- svn:mergeinfo (added)
+++ svn:mergeinfo Sat Aug 27 14:29:59 2016
@@ -0,0 +1,8 @@
+/incubator/uima/uimaj/branches/mavenAlign/uimaj-core/src/main/java/org/apache/uima/cas/impl/BinaryCasSerDes4.java:932400-933272
+/uima/uimaj/branches/2.6.0-json/uimaj-core/src/main/java/org/apache/uima/cas/impl/BinaryCasSerDes4.java:1616936-1617592
+/uima/uimaj/branches/depend-on-july-9-build-tools/uimaj-core/src/main/java/org/apache/uima/cas/impl/BinaryCasSerDes4.java:963167-964468
+/uima/uimaj/branches/depend-on-parent-pom-4/uimaj-core/src/main/java/org/apache/uima/cas/impl/BinaryCasSerDes4.java:961329-961745
+/uima/uimaj/branches/filteredCompress-uima-2498/uimaj-core/src/main/java/org/apache/uima/cas/impl/BinaryCasSerDes4.java:1436573-1462257
+/uima/uimaj/branches/mavenAlign/uimaj-core/src/main/java/org/apache/uima/cas/impl/BinaryCasSerDes4.java:933273-944396
+/uima/uimaj/branches/test-parent-pom-6/uimaj-core/src/main/java/org/apache/uima/cas/impl/BinaryCasSerDes4.java:1024030
+/uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/cas/impl/BinaryCasSerDes4.java:1690273-1757904
Modified:
uima/uimaj/branches/experiment-v3-jcas/uimaj-core/src/main/java/org/apache/uima/cas/impl/BinaryCasSerDes6.java
URL:
http://svn.apache.org/viewvc/uima/uimaj/branches/experiment-v3-jcas/uimaj-core/src/main/java/org/apache/uima/cas/impl/BinaryCasSerDes6.java?rev=1758045&r1=1758044&r2=1758045&view=diff
==============================================================================
---
uima/uimaj/branches/experiment-v3-jcas/uimaj-core/src/main/java/org/apache/uima/cas/impl/BinaryCasSerDes6.java
(original)
+++
uima/uimaj/branches/experiment-v3-jcas/uimaj-core/src/main/java/org/apache/uima/cas/impl/BinaryCasSerDes6.java
Sat Aug 27 14:29:59 2016
@@ -36,6 +36,7 @@ import java.io.FileNotFoundException;
import java.io.FileOutputStream;
import java.io.IOException;
import java.io.InputStream;
+import java.io.ObjectOutputStream;
import java.io.OutputStream;
import java.util.ArrayList;
import java.util.Arrays;
@@ -81,6 +82,8 @@ import org.apache.uima.jcas.cas.Sofa;
import org.apache.uima.jcas.cas.StringArray;
import org.apache.uima.jcas.cas.TOP;
import org.apache.uima.resource.ResourceInitializationException;
+import org.apache.uima.util.CasIOUtils;
+import org.apache.uima.util.CasLoadMode;
import org.apache.uima.util.impl.DataIO;
import org.apache.uima.util.impl.OptimizeStrings;
import org.apache.uima.util.impl.SerializationMeasures;
@@ -315,9 +318,17 @@ public class BinaryCasSerDes6 implements
private OptimizeStrings os;
private boolean only1CommonString; // true if only one common string
+ private boolean isTsIncluded; // type system used for the serialization
+ private boolean isTsiIncluded; // types plus index definition, used to
reset the cas
+
// private TypeInfo typeInfo; // type info for the current type being
serialized/deserialized
// // always the "src" typeInfo I think, except
for compareCas use
final private CasTypeSystemMapper typeMapper;
+
+ /**
+ * This is the used version of isTypeMapping, normally == to isTypeMappingCmn
+ * But compareCASes sets this false temporarily while setting up the
compare
+ */
private boolean isTypeMapping;
// /**
@@ -498,6 +509,7 @@ public class BinaryCasSerDes6 implements
* @param compressStrategy if not null, specifies enum instance for compress
strategy
* @throws ResourceInitializationException if the target type system is
incompatible with the source type system
*/
+
public BinaryCasSerDes6(
AbstractCas aCas,
MarkerImpl mark,
@@ -506,6 +518,19 @@ public class BinaryCasSerDes6 implements
boolean doMeasurements,
CompressLevel compressLevel,
CompressStrat compressStrategy) throws ResourceInitializationException {
+ this(aCas, mark, tgtTs, false, false, rfs, doMeasurements, compressLevel,
compressStrategy);
+ }
+
+ private BinaryCasSerDes6(
+ AbstractCas aCas,
+ MarkerImpl mark,
+ TypeSystemImpl tgtTs,
+ boolean storeTS,
+ boolean storeTSI,
+ ReuseInfo rfs,
+ boolean doMeasurements,
+ CompressLevel compressLevel,
+ CompressStrat compressStrategy) throws ResourceInitializationException {
cas = ((CASImpl) ((aCas instanceof JCas) ? ((JCas)aCas).getCas():
aCas)).getBaseCAS();
bcsd = cas.getBinaryCasSerDes();
@@ -522,6 +547,8 @@ public class BinaryCasSerDes6 implements
isDelta = isSerializingDelta = (mark != null);
typeMapper = srcTs.getTypeSystemMapper(tgtTs);
isTypeMapping = (null != typeMapper);
+ isTsIncluded = storeTS;
+ isTsiIncluded = storeTSI;
// heap = cas.getHeap().heap;
// heapEnd = cas.getHeap().getCellsUsed();
@@ -554,12 +581,55 @@ public class BinaryCasSerDes6 implements
}
/**
+ * only called to set up for deserialization.
+ * clones existing f6, but changes the tgtTs (used to decode)
+ * @param f6 -
+ * @param tgtTs used for decoding
+ * @throws ResourceInitializationException -
+ */
+ BinaryCasSerDes6(BinaryCasSerDes6 f6, TypeSystemImpl tgtTs) throws
ResourceInitializationException {
+ this.cas = f6.cas;
+ this.bcsd = f6.bcsd;
+ this.stringHeapObj = f6.stringHeapObj;
+ this.nextFsId = f6.nextFsId;
+
+ this.srcTs = f6.srcTs;
+ this.tgtTs = tgtTs; // passed in argument !
+ this.compressLevel = f6.compressLevel;
+ this.compressStrategy = f6.compressStrategy;
+
+ this.mark = f6.mark;
+ if (null != mark && !mark.isValid() ) {
+ throw new CASRuntimeException(
+ CASRuntimeException.INVALID_MARKER, "Invalid Marker.");
+ }
+
+ this.isDelta = this.isSerializingDelta = (mark != null);
+ this.fsStartIndexes = f6.fsStartIndexes;
+ this.reuseInfoProvided = f6.reuseInfoProvided;
+ this.doMeasurements = f6.doMeasurements;
+ this.sm = f6.sm;
+
+ this.isTsIncluded = f6.isTsIncluded;
+ this.isTsiIncluded = f6.isTsiIncluded;
+
+ this.typeMapper = srcTs.getTypeSystemMapper(tgtTs);
+ this.isTypeMapping = (null != typeMapper);
+ this.prevHeapInstanceWithIntValues = f6.prevHeapInstanceWithIntValues;
+ this.prevFsWithLongValues = f6.prevFsWithLongValues;
+ this.foundFSs = f6.foundFSs;
+ this.foundFSsBelowMark = f6.foundFSsBelowMark;
+ this.fssToSerialize = f6.fssToSerialize;
+
+ }
+
+ /**
* Setup to serialize (not delta) or deserialize (not delta) using binary
compression, no type mapping but only processing reachable Feature Structures
* @param cas -
* @throws ResourceInitializationException never thrown
*/
public BinaryCasSerDes6(AbstractCas cas) throws
ResourceInitializationException {
- this(cas, null, null, null, false, CompressLevel.Default,
CompressStrat.Default);
+ this(cas, null, null, false, false, null, false, CompressLevel.Default,
CompressStrat.Default);
}
/**
@@ -569,7 +639,7 @@ public class BinaryCasSerDes6 implements
* @throws ResourceInitializationException if the target type system is
incompatible with the source type system
*/
public BinaryCasSerDes6(AbstractCas cas, TypeSystemImpl tgtTs) throws
ResourceInitializationException {
- this(cas, null, tgtTs, null, false, CompressLevel.Default,
CompressStrat.Default);
+ this(cas, null, tgtTs, false, false, null, false, CompressLevel.Default,
CompressStrat.Default);
}
/**
@@ -581,7 +651,7 @@ public class BinaryCasSerDes6 implements
* @throws ResourceInitializationException if the target type system is
incompatible with the source type system
*/
public BinaryCasSerDes6(AbstractCas cas, MarkerImpl mark, TypeSystemImpl
tgtTs, ReuseInfo rfs) throws ResourceInitializationException {
- this(cas, mark, tgtTs, rfs, false, CompressLevel.Default,
CompressStrat.Default);
+ this(cas, mark, tgtTs, false, false, rfs, false, CompressLevel.Default,
CompressStrat.Default);
}
/**
@@ -594,7 +664,7 @@ public class BinaryCasSerDes6 implements
* @throws ResourceInitializationException if the target type system is
incompatible with the source type system
*/
public BinaryCasSerDes6(AbstractCas cas, MarkerImpl mark, TypeSystemImpl
tgtTs, ReuseInfo rfs, boolean doMeasurements) throws
ResourceInitializationException {
- this(cas, mark, tgtTs, rfs, doMeasurements, CompressLevel.Default,
CompressStrat.Default);
+ this(cas, mark, tgtTs, false, false, rfs, doMeasurements,
CompressLevel.Default, CompressStrat.Default);
}
/**
@@ -604,7 +674,19 @@ public class BinaryCasSerDes6 implements
* @throws ResourceInitializationException never thrown
*/
public BinaryCasSerDes6(AbstractCas cas, ReuseInfo rfs) throws
ResourceInitializationException {
- this(cas, null, null, rfs, false, CompressLevel.Default,
CompressStrat.Default);
+ this(cas, null, null, false, false, rfs, false, CompressLevel.Default,
CompressStrat.Default);
+ }
+
+ /**
+ * Setup to serialize (not delta) or deserialize (maybe delta) using binary
compression, no type mapping, optionally storing TSI, and only processing
reachable Feature Structures
+ * @param cas -
+ * @param rfs -
+ * @param storeTS -
+ * @param storeTSI -
+ * @throws ResourceInitializationException never thrown
+ */
+ public BinaryCasSerDes6(AbstractCas cas, ReuseInfo rfs, boolean storeTS,
boolean storeTSI) throws ResourceInitializationException {
+ this(cas, null, null, storeTS, storeTSI, rfs, false,
CompressLevel.Default, CompressStrat.Default);
}
/*********************************************************************************************
@@ -1592,7 +1674,7 @@ public class BinaryCasSerDes6 implements
* @throws IOException -
*/
public void deserialize(InputStream istream) throws IOException {
- readHeader(istream);
+ Header h = readHeader(istream); // side effect, sets deserIn
if (isReadingDelta) {
if (!reuseInfoProvided) {
@@ -1601,8 +1683,9 @@ public class BinaryCasSerDes6 implements
} else {
cas.resetNoQuestions();
}
-
- deserializeAfterVersion(deserIn, isReadingDelta, AllowPreexistingFS.allow);
+
+ bcsd.reinit(h, deserIn, null, CasLoadMode.DEFAULT, this,
AllowPreexistingFS.allow, null);
+// deserializeAfterVersion(deserIn, isReadingDelta,
AllowPreexistingFS.allow);
}
/**
@@ -1612,7 +1695,7 @@ public class BinaryCasSerDes6 implements
* @throws IOException passthru
*/
public void deserialize(InputStream istream, AllowPreexistingFS
allowPreexistingFS) throws IOException {
- readHeader(istream);
+ Header h = readHeader(istream);
if (isReadingDelta) {
if (!reuseInfoProvided) {
@@ -1622,7 +1705,7 @@ public class BinaryCasSerDes6 implements
throw new UnsupportedOperationException("Delta CAS required for this
call");
}
- deserializeAfterVersion(deserIn, isReadingDelta, allowPreexistingFS);
+ bcsd.reinit(h, deserIn, null, CasLoadMode.DEFAULT, this,
allowPreexistingFS, null);
}
@@ -3751,7 +3834,7 @@ public class BinaryCasSerDes6 implements
* @throws IOException passthru
*********************************************/
- private void readHeader(InputStream istream) throws IOException {
+ private Header readHeader(InputStream istream) throws IOException {
deserIn = CommonSerDes.maybeWrapToDataInputStream(istream);
Header h = CommonSerDes.readHeader(deserIn);
@@ -3765,9 +3848,7 @@ public class BinaryCasSerDes6 implements
}
isReadingDelta = isDelta = h.isDelta;
-
-
-
+ return h;
}
/* *******************************************
@@ -3843,6 +3924,10 @@ public class BinaryCasSerDes6 implements
return v;
}
+ TypeSystemImpl getTgtTs() {
+ return this.tgtTs;
+ }
+
// number of views: cas.getNumberOfViews()
// number of sofas cas.getNumberOfSofas()
// [sofa-1 ... sofa-n] cas.getSofaIterator()
Propchange:
uima/uimaj/branches/experiment-v3-jcas/uimaj-core/src/main/java/org/apache/uima/cas/impl/BinaryCasSerDes6.java
------------------------------------------------------------------------------
--- svn:mergeinfo (added)
+++ svn:mergeinfo Sat Aug 27 14:29:59 2016
@@ -0,0 +1,8 @@
+/incubator/uima/uimaj/branches/mavenAlign/uimaj-core/src/main/java/org/apache/uima/cas/impl/BinaryCasSerDes6.java:932400-933272
+/uima/uimaj/branches/2.6.0-json/uimaj-core/src/main/java/org/apache/uima/cas/impl/BinaryCasSerDes6.java:1616936-1617592
+/uima/uimaj/branches/depend-on-july-9-build-tools/uimaj-core/src/main/java/org/apache/uima/cas/impl/BinaryCasSerDes6.java:963167-964468
+/uima/uimaj/branches/depend-on-parent-pom-4/uimaj-core/src/main/java/org/apache/uima/cas/impl/BinaryCasSerDes6.java:961329-961745
+/uima/uimaj/branches/filteredCompress-uima-2498/uimaj-core/src/main/java/org/apache/uima/cas/impl/BinaryCasSerDes6.java:1436573-1462257
+/uima/uimaj/branches/mavenAlign/uimaj-core/src/main/java/org/apache/uima/cas/impl/BinaryCasSerDes6.java:933273-944396
+/uima/uimaj/branches/test-parent-pom-6/uimaj-core/src/main/java/org/apache/uima/cas/impl/BinaryCasSerDes6.java:1024030
+/uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/cas/impl/BinaryCasSerDes6.java:1690273-1757885
Modified:
uima/uimaj/branches/experiment-v3-jcas/uimaj-core/src/main/java/org/apache/uima/cas/impl/CASImpl.java
URL:
http://svn.apache.org/viewvc/uima/uimaj/branches/experiment-v3-jcas/uimaj-core/src/main/java/org/apache/uima/cas/impl/CASImpl.java?rev=1758045&r1=1758044&r2=1758045&view=diff
==============================================================================
---
uima/uimaj/branches/experiment-v3-jcas/uimaj-core/src/main/java/org/apache/uima/cas/impl/CASImpl.java
(original)
+++
uima/uimaj/branches/experiment-v3-jcas/uimaj-core/src/main/java/org/apache/uima/cas/impl/CASImpl.java
Sat Aug 27 14:29:59 2016
@@ -295,7 +295,7 @@ public class CASImpl extends AbstractCas
// A map from SofaNumbers which are also view numbers to IndexRepositories.
// these numbers are dense, and start with 1. 1 is the initial view. 0
is the base cas
- private ArrayList<FSIndexRepositoryImpl> sofa2indexMap;
+ ArrayList<FSIndexRepositoryImpl> sofa2indexMap;
/**
@@ -309,7 +309,7 @@ public class CASImpl extends AbstractCas
* However, the maximum view count is reset; so creation of new views
"reuses" these pre-setup indexRepos
* associated with these views.
*/
- private ArrayList<CASImpl> sofaNbr2ViewMap;
+ ArrayList<CASImpl> sofaNbr2ViewMap;
/**
* a set of instantiated sofaNames
Modified:
uima/uimaj/branches/experiment-v3-jcas/uimaj-core/src/main/java/org/apache/uima/cas/impl/CASSerializer.java
URL:
http://svn.apache.org/viewvc/uima/uimaj/branches/experiment-v3-jcas/uimaj-core/src/main/java/org/apache/uima/cas/impl/CASSerializer.java?rev=1758045&r1=1758044&r2=1758045&view=diff
==============================================================================
---
uima/uimaj/branches/experiment-v3-jcas/uimaj-core/src/main/java/org/apache/uima/cas/impl/CASSerializer.java
(original)
+++
uima/uimaj/branches/experiment-v3-jcas/uimaj-core/src/main/java/org/apache/uima/cas/impl/CASSerializer.java
Sat Aug 27 14:29:59 2016
@@ -42,6 +42,7 @@ import org.apache.uima.jcas.cas.LongArra
import org.apache.uima.jcas.cas.ShortArray;
import org.apache.uima.jcas.cas.StringArray;
import org.apache.uima.jcas.cas.TOP;
+import org.apache.uima.util.CasIOUtils;
/**
* This object has 2 purposes.
@@ -219,6 +220,10 @@ public class CASSerializer implements Se
dos.writeInt(shdh.refHeap[i +
StringHeapDeserializationHelper.CHAR_HEAP_STRLEN_OFFSET]);
}
}
+
+ void addTsiCAS(CASImpl cas, OutputStream ostream) {
+
+ }
/**
* Serializes the CAS data and writes it to the output stream.
@@ -249,6 +254,10 @@ public class CASSerializer implements Se
* @param ostream -
*/
public void addCAS(CASImpl cas, OutputStream ostream) {
+ addCAS(cas, ostream, false);
+ }
+
+ public void addCAS(CASImpl cas, OutputStream ostream, boolean includeTsi) {
final BinaryCasSerDes bcsd = cas.getBinaryCasSerDes();
final CommonSerDesSequential csds =
BinaryCasSerDes4.getCsds(cas.getBaseCAS(), false); // saves the csds in the
cas, used for delta
@@ -264,9 +273,14 @@ public class CASSerializer implements Se
// output the key and version number
CommonSerDes.createHeader()
.seqVer(1) // 0 original, 1 UIMA-4743
+ .typeSystemIndexDefIncluded(includeTsi)
.v3()
.write(dos);
-
+
+ if (includeTsi) {
+ CasIOUtils.writeTypeSystem(cas, ostream, true);
+ }
+
// output the FS heap
final int heapSize = bcsd.heap.getCellsUsed();
dos.writeInt(heapSize);
@@ -536,7 +550,7 @@ public class CASSerializer implements Se
writeMods(chgByteAddr, dos, i -> dos.writeByte(chgByteValues.heap[i]));
// word alignment
- align = (4 - (byteheapsz % 4)) % 4;
+ align = (4 - (chgByteAddr.size() % 4)) % 4;
for (int i = 0; i < align; i++) {
dos.writeByte(0);
}
@@ -545,7 +559,7 @@ public class CASSerializer implements Se
writeMods(chgShortAddr, dos, i ->
dos.writeShort(chgShortValues.heap[i]));
// word alignment
- if (shortheapsz % 2 != 0) {
+ if (chgShortAddr.size() % 2 != 0) {
dos.writeShort(0);
}
Propchange:
uima/uimaj/branches/experiment-v3-jcas/uimaj-core/src/main/java/org/apache/uima/cas/impl/CASSerializer.java
------------------------------------------------------------------------------
--- svn:mergeinfo (added)
+++ svn:mergeinfo Sat Aug 27 14:29:59 2016
@@ -0,0 +1,8 @@
+/incubator/uima/uimaj/branches/mavenAlign/uimaj-core/src/main/java/org/apache/uima/cas/impl/CASSerializer.java:932400-933272
+/uima/uimaj/branches/2.6.0-json/uimaj-core/src/main/java/org/apache/uima/cas/impl/CASSerializer.java:1616936-1617592
+/uima/uimaj/branches/depend-on-july-9-build-tools/uimaj-core/src/main/java/org/apache/uima/cas/impl/CASSerializer.java:963167-964468
+/uima/uimaj/branches/depend-on-parent-pom-4/uimaj-core/src/main/java/org/apache/uima/cas/impl/CASSerializer.java:961329-961745
+/uima/uimaj/branches/filteredCompress-uima-2498/uimaj-core/src/main/java/org/apache/uima/cas/impl/CASSerializer.java:1436573-1462257
+/uima/uimaj/branches/mavenAlign/uimaj-core/src/main/java/org/apache/uima/cas/impl/CASSerializer.java:933273-944396
+/uima/uimaj/branches/test-parent-pom-6/uimaj-core/src/main/java/org/apache/uima/cas/impl/CASSerializer.java:1024030
+/uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/cas/impl/CASSerializer.java:1690273-1757907
Modified:
uima/uimaj/branches/experiment-v3-jcas/uimaj-core/src/main/java/org/apache/uima/cas/impl/CommonSerDes.java
URL:
http://svn.apache.org/viewvc/uima/uimaj/branches/experiment-v3-jcas/uimaj-core/src/main/java/org/apache/uima/cas/impl/CommonSerDes.java?rev=1758045&r1=1758044&r2=1758045&view=diff
==============================================================================
---
uima/uimaj/branches/experiment-v3-jcas/uimaj-core/src/main/java/org/apache/uima/cas/impl/CommonSerDes.java
(original)
+++
uima/uimaj/branches/experiment-v3-jcas/uimaj-core/src/main/java/org/apache/uima/cas/impl/CommonSerDes.java
Sat Aug 27 14:29:59 2016
@@ -26,6 +26,8 @@ import java.io.InputStream;
import java.io.OutputStream;
import java.nio.ByteBuffer;
+import org.apache.uima.UIMARuntimeException;
+
/**
* Common de/serialization
*/
@@ -48,6 +50,7 @@ public class CommonSerDes {
* - bit in 0x01 position: on for binary non-delta (redundant)
* - bit in 0x02 position: on means delta, off - not delta
* - bit in 0x04 position: on means compressed, off means plain binary
+ * - bit in 0x08 position: on means type system included
* - bits 0xF8 reserved
*
* - byte in 0xFF 00 position: incrementing (starting w/ 0) version
@@ -63,12 +66,14 @@ public class CommonSerDes {
* - bit in 0x01 position: on means form6, off = form 4
*********************************************/
- static class Header {
+ public static class Header {
boolean isDelta;
boolean isCompressed;
boolean isV3style;
boolean form4;
boolean form6;
+ boolean typeSystemIncluded; // for form 6, TS only
+ boolean typeSystemIndexDefIncluded;
byte seqVersionNbr;
boolean isV3;
boolean swap;
@@ -77,18 +82,21 @@ public class CommonSerDes {
Reading reading;
- Header delta() {isDelta = true; return this; }
- Header delta(boolean v2) {isDelta = v2; return this; }
- Header form4() {isCompressed = form4 = true; form6 = false; return this; }
- Header form6() {isCompressed = form6 = true; form4 = false; return this; }
- Header seqVer(int v2) { assert (v2 >= 0 && v2 < 256); seqVersionNbr =
(byte)v2; return this; }
- Header v3() {isV3 = true; return this; }
+ public Header delta() {isDelta = true; return this; }
+ public Header delta(boolean v2) {isDelta = v2; return this; }
+ public Header form4() {isCompressed = form4 = true; form6 = false; return
this; }
+ public Header form6() {isCompressed = form6 = true; form4 = false; return
this; }
+ public Header typeSystemIncluded(boolean f) {typeSystemIncluded = f;
return this; }
+ public Header typeSystemIndexDefIncluded(boolean f)
{typeSystemIndexDefIncluded = f; return this; }
+ public Header seqVer(int v2) { assert (v2 >= 0 && v2 < 256); seqVersionNbr
= (byte)v2; return this; }
+ public Header v3() {isV3 = true; return this; }
- void write(DataOutputStream dos) throws IOException {
+ public void write(DataOutputStream dos) throws IOException {
v = (!isCompressed && !isDelta) ? 1 : 0;
if (isDelta) v |= 0x02;
if (isCompressed) v |= 0x04;
+ if (typeSystemIndexDefIncluded) v |= 0x08;
v |= (seqVersionNbr << 8);
if (isV3) v |= 0x010000;
@@ -107,15 +115,66 @@ public class CommonSerDes {
if (isCompressed) {
dos.writeInt(form6 ? 1 : 0);
}
+
+ }
+
+ public boolean isDelta() {
+ return isDelta;
+ }
+ public boolean isCompressed() {
+ return isCompressed;
}
+ public boolean isV3style() {
+ return isV3style;
+ }
+ public boolean isForm4() {
+ return form4;
+ }
+ public boolean isForm6() {
+ return form6;
+ }
+ public boolean isTypeSystemIndexDefIncluded() {
+ return typeSystemIndexDefIncluded;
+ }
+ public boolean isTypeSystemIncluded() {
+ return typeSystemIncluded;
+ }
+ public byte getSeqVersionNbr() {
+ return seqVersionNbr;
+ }
+ public boolean isV3() {
+ return isV3;
+ }
+
+
}
- static Header createHeader() {
+ public static Header createHeader() {
return new Header();
}
+ public static boolean isBinaryHeader(DataInputStream dis) {
+ dis.mark(4);
+ byte[] bytebuf = new byte[4];
+ try {
+ bytebuf[0] = dis.readByte(); // U
+ bytebuf[1] = dis.readByte(); // I
+ bytebuf[2] = dis.readByte(); // M
+ bytebuf[3] = dis.readByte(); // A
+ String s = new String(bytebuf, "UTF-8");
+ return s.equals("UIMA") || s.equals("AMIU");
+ } catch (IOException e) {
+ return false;
+ } finally {
+ try {
+ dis.reset();
+ } catch (IOException e) {
+ throw new UIMARuntimeException(e);
+ }
+ }
+ }
- static Header readHeader(DataInputStream dis) throws IOException {
+ public static Header readHeader(DataInputStream dis) throws IOException {
Header h = new Header();
// key
@@ -134,6 +193,7 @@ public class CommonSerDes {
h.isDelta = (v & 2) != 0;
h.isCompressed = (v & 4) != 0;
+ h.typeSystemIndexDefIncluded = (v & 8) != 0;
h.seqVersionNbr = (byte) ((v & 0xFF00) >> 8);
if (h.isCompressed) {
@@ -145,25 +205,25 @@ public class CommonSerDes {
return h;
}
- static DataOutputStream maybeWrapToDataOutputStream(OutputStream os) {
+ public static DataOutputStream maybeWrapToDataOutputStream(OutputStream os) {
if (os instanceof DataOutputStream) {
return (DataOutputStream) os;
}
return new DataOutputStream(os);
}
- static DataInputStream maybeWrapToDataInputStream(InputStream is) {
- if (is instanceof DataInputStream) {
- return (DataInputStream) is;
+ public static DataInputStream maybeWrapToDataInputStream(InputStream os) {
+ if (os instanceof DataInputStream) {
+ return (DataInputStream) os;
}
- return new DataInputStream(is);
+ return new DataInputStream(os);
}
/**
* byte swapping reads of integer forms
*/
- static class Reading {
+ public static class Reading {
final DataInputStream dis;
final boolean swap;
Modified:
uima/uimaj/branches/experiment-v3-jcas/uimaj-core/src/main/java/org/apache/uima/cas/impl/Serialization.java
URL:
http://svn.apache.org/viewvc/uima/uimaj/branches/experiment-v3-jcas/uimaj-core/src/main/java/org/apache/uima/cas/impl/Serialization.java?rev=1758045&r1=1758044&r2=1758045&view=diff
==============================================================================
---
uima/uimaj/branches/experiment-v3-jcas/uimaj-core/src/main/java/org/apache/uima/cas/impl/Serialization.java
(original)
+++
uima/uimaj/branches/experiment-v3-jcas/uimaj-core/src/main/java/org/apache/uima/cas/impl/Serialization.java
Sat Aug 27 14:29:59 2016
@@ -42,7 +42,10 @@ import org.apache.uima.resource.Resource
* - one which makes use of various custom binary serialization methods, and
* - one which just converts CAS and related objects into other objects which
* in turn are serializable by normal Java Object serialization.
- *
+ *
+ * See also CasIOUtils, which has static methods for serialization and
deserialization, including
+ * support for XMI and XCAS.
+ *
*/
public class Serialization {
@@ -92,6 +95,19 @@ public class Serialization {
.getBaseIndexRepository());
return ser;
}
+
+ /**
+ * Convert a Type System into a
+ * CASMgrSerializer object which can be serialized
+ *
+ * @param casMgr the type system and index repo definitions
+ * @return a serializable object version of these
+ */
+ public static CASMgrSerializer serializeCASMgrTypeSystemOnly(CASMgr casMgr) {
+ CASMgrSerializer ser = new CASMgrSerializer();
+ ser.addTypeSystem((TypeSystemImpl) casMgr.getCAS().getTypeSystem());
+ return ser;
+ }
/**
* Convert a CAS + the type system and index definitions into a
@@ -213,6 +229,26 @@ public class Serialization {
}
/**
+ * Serialize in compressed binary with type filtering
+ * This method can use type filtering to omit sending those types and/or
features not present in the target type system.
+ * - To omit type filtering, use null for the target type system
+ * It also only sends those feature structures which are reachable either
from an index or references from other reachable feature structures.
+ *
+ * @param cas the CAS to serialize
+ * @param out an OutputStream, a DataOutputStream, or a File
+ * @param includeTS true to serialize the type system
+ * @param includeTSI true to serialize the type system and the indexes
definition
+ * @return information to be used on subsequent serializations (to save
time) or deserializations (for receiving delta CASs), or reserializations (if
sending delta CASs)
+ * @throws IOException if IO exception
+ * @throws ResourceInitializationException if target type system is
incompatible with this CAS's type system
+ */
+ public static ReuseInfo serializeWithCompression(CAS cas, Object out,
boolean includeTS, boolean includeTSI) throws IOException,
ResourceInitializationException {
+ BinaryCasSerDes6 bcs = new BinaryCasSerDes6(cas, null, includeTS,
includeTSI);
+ bcs.serialize(out);
+ return bcs.getReuseInfo();
+ }
+
+ /**
* Serialize in compressed binary with type filtering
* This method can use type filtering to omit sending those types and/or
features not present in the target type system.
* - To omit type filtering, use null for the target type system
Propchange:
uima/uimaj/branches/experiment-v3-jcas/uimaj-core/src/main/java/org/apache/uima/cas/impl/Serialization.java
------------------------------------------------------------------------------
--- svn:mergeinfo (added)
+++ svn:mergeinfo Sat Aug 27 14:29:59 2016
@@ -0,0 +1,8 @@
+/incubator/uima/uimaj/branches/mavenAlign/uimaj-core/src/main/java/org/apache/uima/cas/impl/Serialization.java:932400-933272
+/uima/uimaj/branches/2.6.0-json/uimaj-core/src/main/java/org/apache/uima/cas/impl/Serialization.java:1616936-1617592
+/uima/uimaj/branches/depend-on-july-9-build-tools/uimaj-core/src/main/java/org/apache/uima/cas/impl/Serialization.java:963167-964468
+/uima/uimaj/branches/depend-on-parent-pom-4/uimaj-core/src/main/java/org/apache/uima/cas/impl/Serialization.java:961329-961745
+/uima/uimaj/branches/filteredCompress-uima-2498/uimaj-core/src/main/java/org/apache/uima/cas/impl/Serialization.java:1436573-1462257
+/uima/uimaj/branches/mavenAlign/uimaj-core/src/main/java/org/apache/uima/cas/impl/Serialization.java:933273-944396
+/uima/uimaj/branches/test-parent-pom-6/uimaj-core/src/main/java/org/apache/uima/cas/impl/Serialization.java:1024030
+/uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/cas/impl/Serialization.java:1690273-1757906
Modified:
uima/uimaj/branches/experiment-v3-jcas/uimaj-core/src/main/java/org/apache/uima/cas/impl/XmiCasDeserializer.java
URL:
http://svn.apache.org/viewvc/uima/uimaj/branches/experiment-v3-jcas/uimaj-core/src/main/java/org/apache/uima/cas/impl/XmiCasDeserializer.java?rev=1758045&r1=1758044&r2=1758045&view=diff
==============================================================================
---
uima/uimaj/branches/experiment-v3-jcas/uimaj-core/src/main/java/org/apache/uima/cas/impl/XmiCasDeserializer.java
(original)
+++
uima/uimaj/branches/experiment-v3-jcas/uimaj-core/src/main/java/org/apache/uima/cas/impl/XmiCasDeserializer.java
Sat Aug 27 14:29:59 2016
@@ -91,7 +91,7 @@ public class XmiCasDeserializer {
private static final String ID_ATTR_NAME = "xmi:id";
- private class XmiCasDeserializerHandler extends DefaultHandler {
+ public class XmiCasDeserializerHandler extends DefaultHandler {
// ///////////////////////////////////////////////////////////////////////
// Internal states for the parser.
Added:
uima/uimaj/branches/experiment-v3-jcas/uimaj-core/src/main/java/org/apache/uima/util/CasIOUtils.java
URL:
http://svn.apache.org/viewvc/uima/uimaj/branches/experiment-v3-jcas/uimaj-core/src/main/java/org/apache/uima/util/CasIOUtils.java?rev=1758045&view=auto
==============================================================================
---
uima/uimaj/branches/experiment-v3-jcas/uimaj-core/src/main/java/org/apache/uima/util/CasIOUtils.java
(added)
+++
uima/uimaj/branches/experiment-v3-jcas/uimaj-core/src/main/java/org/apache/uima/util/CasIOUtils.java
Sat Aug 27 14:29:59 2016
@@ -0,0 +1,542 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.uima.util;
+
+import static org.apache.uima.cas.impl.Serialization.serializeCAS;
+import static org.apache.uima.cas.impl.Serialization.serializeWithCompression;
+
+import java.io.BufferedInputStream;
+import java.io.Closeable;
+import java.io.DataInputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.ObjectInputStream;
+import java.io.ObjectOutputStream;
+import java.io.OutputStream;
+import java.net.URL;
+import java.util.Arrays;
+
+import org.apache.uima.UIMARuntimeException;
+import org.apache.uima.cas.CAS;
+import org.apache.uima.cas.CASRuntimeException;
+import org.apache.uima.cas.SerialFormat;
+import org.apache.uima.cas.TypeSystem;
+import org.apache.uima.cas.admin.CASMgr;
+import org.apache.uima.cas.impl.AllowPreexistingFS;
+import org.apache.uima.cas.impl.BinaryCasSerDes;
+import org.apache.uima.cas.impl.BinaryCasSerDes4;
+import org.apache.uima.cas.impl.CASCompleteSerializer;
+import org.apache.uima.cas.impl.CASImpl;
+import org.apache.uima.cas.impl.CASMgrSerializer;
+import org.apache.uima.cas.impl.CASSerializer;
+import org.apache.uima.cas.impl.CommonSerDes;
+import org.apache.uima.cas.impl.CommonSerDes.Header;
+import org.apache.uima.cas.impl.Serialization;
+import org.apache.uima.cas.impl.TypeSystemImpl;
+import org.apache.uima.cas.impl.XCASSerializer;
+import org.apache.uima.cas.impl.XmiCasSerializer;
+import org.xml.sax.SAXException;
+
+/**
+ * <p>A collection of static methods aimed at making it easy to</p>
+ * <ul>
+ * <li>save and load CASes, and to</li>
+ * <li>optionally include the CAS's Type System (abbreviated TS (only
available for Compressed Form 6)) and optionally also include the CAS's indexes
definition.</li>
+ * <li>The combinaton of Type System and Indexes definition is called TSI.
+ * <ul>
+ * <li>The TSI's purpose: to replace the CAS's existing type system and
index definition.</li>
+ * <li>The TS's purpose: to specify the type system used in the
serialized data for format Compressed Form 6, in order to allow deserializing
into some other type system in the CAS, leniently.</li>
+ * </ul>
+ * </li>
+ * </ul>
+ *
+ * <p>TSI information can be</p>
+ * <ul>
+ * <li>embedded</li>
+ * <li>externally supplied (via another input source to the load)</li>
+ * <li>both embedded and externally supplied. In this case the
embedded takes precedence.</li>
+ * </ul>
+ *
+ * <p>TS information is available embedded, for COMPRESSED_FILTERED_TS format,
+ * and also from embedded or external TSI information (since it also
contains the type system information).</p>
+ *
+ * <p>When an external TSI is supplied while loading Compressed Form 6,</p>
+ * <ul>
+ * <li>for COMPRESSED_FILTERED_TS
+ * <ul>
+ * <li>it uses the embedded TS for decoding</li>
+ * <li>it uses the external TSI to replace the CAS's existing type
system and index definition if CasLoadMode == REINIT.</li>
+ * </ul>
+ * </li>
+ * <li>for COMPRESSED_FILTERED_TSI
+ * <ul>
+ * <li>the external TSI is ignored, the embedded one overrides, but
otherwise operates as above.</li>
+ * </ul>
+ * </li>
+ * <li>for COMPRESSED_FILTERED
+ * <ul>
+ * <li>the external TSI's type system part is used for decoding.</li>
+ * <li>if CasLoadMode == REINIT, the external TSI is also used to
replace the CAS's existing type system and index definition.</li>
+ * </ul>
+ * </li>
+ * </ul>
+ *
+ * <p>Compressed Form 6 loading decoding type system is picked from these
sources, in this order:</p>
+ * <ul>
+ * <li>a passed in type system</li>
+ * <li>an embedded TS or TSI</li>
+ * <li>an external TSI</li>
+ * <li>the CAS's type system</li>
+ * </ul>
+ *
+ * <p>The serialization formats supported here are specified in the
SerialFormat enum.</p>
+ *
+ * <p>The <code>load </code>api's automatically use the appropriate
deserializers, based on the input data format.</p>
+ *
+ * <p>Loading inputs may be supplied as URLs or as an appropriately buffered
InputStream.</p>
+ *
+ * <p>Note: you can use Files or Paths by converting these to URLs:</p>
+ * <ul>
+ * <li><code>URL url = a_path.toUri().toURL();</code></li>
+ * <li><code>URL url = a_file.toUri().toURL();</code></li>
+ * </ul>
+ *
+ * <p>When loading, an optional CasLoadMode enum value maybe specified to
indicate</p>
+ * <ul>
+ * <li>LENIENT loading - used with XCas and XMI data data sources to
silently ignore types and features present in the serialized form, but not in
the receiving type system.</li>
+ * <li>REINIT - used with Compressed Form 6 loading to indicate that
if no embedded TSI information is available, the external TSI is to be used to
replace the CAS's existing type system and index definition.</li>
+ * </ul>
+ *
+ * <p style="padding-left: 30px;">For more details, see the Javadocs for
CasLoadMode.</p>
+ *
+ * <p>When TS or TSI information is saved, it is either saved in the same
destination (e.g. file or stream), or in a separate one.</p>
+ * <ul>
+ * <li>The serialization formats ending in _TSI and _TS support saving the
TSI (or TS) in the same destination.</li>
+ * <li>The save APIs for other formats can optionally also save the TSI into
a separate (second) OutputStream.</li>
+ * </ul>
+ *
+ * <p>Summary of APIs for saving:</p>
+ * <pre style="padding-left: 30px;">
+ * <code>save(aCAS, outputStream, aSerialFormat)</code>
+ * <code>save(aCAS, outputStream, tsiOutputStream,
aSerialFormat)</code></pre>
+ *
+ * <p>Summary of APIs for loading:</p>
+ * <pre style="padding-left: 30px;">
+ * <code>load(aURL , aCas)</code>
+ * <code>load(inputStream, aCas)</code>
+ * <code>load(inputStream, aCas, typeSystem)</code> // typeSystem used for
decoding Compressed Form 6
+ * <code>load(inputStream, tsiInputStream, aCas)</code></pre>
+ * <pre style="padding-left: 30px;">
+ * <code>load(aURL ,
tsiURL , aCAS,
casLoadMode) - the second URL is for loading a separately-stored
TSI</code>
+ * <code>load(inputStream, tsiInputStream, aCAS, aCasLoadMode)</code>
+ * <code>load(aURL ,
tsiURL , aCAS,
lenient) - lenient is used to set the
CasLoadMode to LENIENT or DEFAULT</code>
+ * <code>load(inputStream, tsiInputStream, aCAS, lenient)</code></pre>
+ */
+
+public class CasIOUtils {
+
+ /**
+ * Loads a Cas from a URL source.
+ * For SerialFormats ending with _TSI except for COMPRESSED_FILTERED_TSI,
+ * the CAS's type system and indexes definition are replaced.
+ * CasLoadMode is DEFAULT.
+ *
+ * @param casUrl
+ * The url containing the CAS
+ * @param aCAS
+ * The CAS that should be filled
+ * @return the SerialFormat of the loaded CAS
+ * @throws IOException
+ * - Problem loading from given URL
+ */
+ public static SerialFormat load(URL casUrl, CAS aCAS) throws IOException {
+
+ return load(casUrl, null, aCAS, CasLoadMode.DEFAULT);
+ }
+
+ /**
+ * Loads a CAS from a URL source. The format is determined from the content.
+ *
+ * If the value of tsiUrl is null it is ignored.
+ *
+ * @param casUrl
+ * The url to deserialize the CAS from
+ * @param tsiUrl
+ * null or an optional url to deserialize the type system and index
definitions from
+ * @param aCAS
+ * The CAS that should be filled
+ * @param casLoadMode specifies how to handle reinitialization and lenient
loading
+ * see the Javadocs for CasLoadMode
+ * @return the SerialFormat of the loaded CAS
+ * @throws IOException Problem loading
+ */
+ public static SerialFormat load(URL casUrl, URL tsiUrl, CAS aCAS,
CasLoadMode casLoadMode)
+ throws IOException {
+ InputStream casIS = new BufferedInputStream(casUrl.openStream());
+ InputStream tsIS = (tsiUrl == null) ? null : new
BufferedInputStream(tsiUrl.openStream());
+ try {
+ return load(casIS, tsIS, aCAS, casLoadMode);
+ } finally {
+ closeQuitely(casIS);
+ closeQuitely(tsIS);
+ }
+ }
+
+ /**
+ * Loads a CAS from a URL source. The format is determined from the content.
+ * For SerialFormats ending with _TSI except for COMPRESSED_FILTERED_TSI,
+ * the CAS's type system and indexes definition are replaced.
+ * CasLoadMode is set according to the leniently flag.
+ *
+ * @param casUrl
+ * The url to deserialize the CAS from
+ * @param tsiUrl
+ * The optional url to deserialize the type system and index
definitions from
+ * @param aCAS
+ * The CAS that should be filled
+ * @param leniently true means do lenient loading
+ * @return the SerialFormat of the loaded CAS
+ * @throws IOException Problem loading
+ */
+ public static SerialFormat load(URL casUrl, URL tsiUrl, CAS aCAS, boolean
leniently)
+ throws IOException {
+ return load(casUrl, tsiUrl, aCAS, leniently ? CasLoadMode.LENIENT :
CasLoadMode.DEFAULT);
+ }
+
+ /**
+ * Loads a Cas from an Input Stream. The format is determined from the
content.
+ * For SerialFormats ending with _TSI except for COMPRESSED_FILTERED_TSI,
+ * the CAS's type system and indexes definition are replaced.
+ * CasLoadMode is DEFAULT.
+ *
+ * @param casInputStream
+ * The input stream containing the CAS. Caller should buffer this
appropriately.
+ * @param aCAS
+ * The CAS that should be filled
+ * @return the SerialFormat of the loaded CAS
+ * @throws IOException
+ * - Problem loading from given InputStream
+ */
+ public static SerialFormat load(InputStream casInputStream, CAS aCAS) throws
IOException {
+ return load(casInputStream, null, aCAS, CasLoadMode.DEFAULT);
+ }
+
+ /**
+ * Loads a CAS from an Input Stream. The format is determined from the
content.
+ *
+ * For SerialFormats ending with _TSI the embedded value is used instead of
any supplied external TSI information.
+ * TSI information is available either via embedded value, or if a non-null
input is passed for tsiInputStream.
+ *
+ * If TSI information is available, the CAS's type system and indexes
definition are replaced,
+ * except for SerialFormats COMPRESSED_FILTERED, COMPRESSED_FILTERED_TS, and
COMPRESSED_FILTERED_TSI.
+ *
+ * The CasLoadMode is DEFAULT.
+ *
+ * @param casInputStream -
+ * @param tsiInputStream -
+ * @param aCAS -
+ * @return -
+ * @throws IOException -
+ */
+ public static SerialFormat load(InputStream casInputStream, InputStream
tsiInputStream, CAS aCAS) throws IOException {
+ return load(casInputStream, tsiInputStream, aCAS, CasLoadMode.DEFAULT);
+ }
+
+ /**
+ * Loads a CAS from an Input Stream. The format is determined from the
content.
+ *
+ * For SerialFormats ending with _TSI the embedded value is used instead of
any supplied external TSI information.
+ * TSI information is available either via embedded value, or if a non-null
input is passed for tsiInputStream.
+ *
+ * If TSI information is available, the CAS's type system and indexes
definition are replaced,
+ * except for SerialFormats COMPRESSED_FILTERED, COMPRESSED_FILTERED_TS, and
COMPRESSED_FILTERED_TSI.
+ *
+ * The CasLoadMode is set to LENIENT if the leniently flag is true;
otherwise it is set to DEFAULT.
+ *
+ * @param casInputStream -
+ * @param tsiInputStream -
+ * @param aCAS -
+ * @param leniently -
+ * @return -
+ * @throws IOException -
+ */
+ public static SerialFormat load(InputStream casInputStream, InputStream
tsiInputStream, CAS aCAS, boolean leniently) throws IOException {
+ return load(casInputStream, tsiInputStream, aCAS, leniently ?
CasLoadMode.LENIENT : CasLoadMode.DEFAULT);
+ }
+
+ /**
+ * Loads a CAS from an Input Stream. The format is determined from the
content.
+ * For formats of ending in _TSI SERIALIZED_TSI or COMPRESSED_FILTERED_TSI,
+ * the type system and index definitions are read from the cas input source;
+ * the value of tsiInputStream is ignored.
+ *
+ * For other formats, if the tsiInputStream is not null,
+ * type system and index definitions are read from that source.
+ *
+ * If TSI information is available, the CAS's type system and indexes
definition are replaced,
+ * except for SerialFormats COMPRESSED_FILTERED, COMPRESSED_FILTERED_TS, and
COMPRESSED_FILTERED_TSI.
+ *
+ * If the CasLoadMode == REINIT, then the TSI information is also used for
these 3 formats to replace the CAS's definitions.
+ *
+ * @param casInputStream
+ * The input stream containing the CAS, appropriately buffered.
+ * @param tsiInputStream
+ * The optional input stream containing the type system,
appropriately buffered.
+ * This is only used if it is non null and
+ * - the casInputStream does not already come with an embedded
CAS Type System and Index Definition, or
+ * - the serial format is COMPRESSED_FILTERED_TSI
+ * @param aCAS
+ * The CAS that should be filled
+ * @param casLoadMode specifies loading alternative like lenient and reinit,
see CasLoadMode.
+ * @return the SerialFormat of the loaded CAS
+ * @throws IOException
+ * - Problem loading from given InputStream
+ */
+ public static SerialFormat load(InputStream casInputStream, InputStream
tsiInputStream, CAS aCAS,
+ CasLoadMode casLoadMode) throws IOException {
+ return load(casInputStream, tsiInputStream, aCAS, casLoadMode, null);
+ }
+
+ /**
+ * This load variant can be used for loading Form 6 compressed CASes where
the
+ * type system to use to deserialize is provided as an argument. It can
also load other formats,
+ * where its behavior is identical to load(casInputStream, aCas).
+ *
+ * Loads a CAS from an Input Stream. The format is determined from the
content.
+ * For SerialFormats of ending in _TSI SERIALIZED_TSI or
COMPRESSED_FILTERED_TSI,
+ * the type system and index definitions are read from the cas input source;
+ * the value of typeSystem is ignored.
+ *
+ * For COMPRESSED_FILTERED_xxx formats, if the typeSystem is not null,
+ * the typeSystem is used for decoding.
+ *
+ * If embedded TSI information is available, the CAS's type system and
indexes definition are replaced,
+ * except for SerialFormats COMPRESSED_FILTERED, COMPRESSED_FILTERED_TS, and
COMPRESSED_FILTERED_TSI.
+ *
+ * To replace the CAS's type system and indexes definition for these, use
a load form which
+ * has the CasLoadMode argument, and set this to REINIT.
+ *
+ * @param casInputStream
+ * The input stream containing the CAS, appropriately buffered.
+ * @param aCAS
+ * The CAS that should be filled
+ * @param typeSystem the type system to use for decoding the serialized
form, must be non-null
+ * @return the SerialFormat of the loaded CAS
+ * @throws IOException Problem loading from given InputStream
+ */
+ public static SerialFormat load(InputStream casInputStream, CAS aCAS,
TypeSystem typeSystem) throws IOException {
+ return load(casInputStream, null, aCAS, CasLoadMode.DEFAULT,
(TypeSystemImpl) typeSystem);
+ }
+
+ private static SerialFormat load(InputStream casInputStream, InputStream
tsiInputStream, CAS aCAS,
+ CasLoadMode casLoadMode, TypeSystemImpl typeSystem) throws IOException {
+
+ if (!casInputStream.markSupported()) {
+ casInputStream = new BufferedInputStream(casInputStream);
+ }
+
+ CASImpl casImpl = (CASImpl) aCAS;
+ BinaryCasSerDes bcsd = casImpl.getBinaryCasSerDes();
+
+ // scan the first part of the file for known formats
+ casInputStream.mark(6);
+ byte[] firstPartOfFile = new byte[6];
+ int bytesReadCount = casInputStream.read(firstPartOfFile);
+ casInputStream.reset();
+ String start = new String(firstPartOfFile, 0, bytesReadCount,
"UTF-8").toLowerCase();
+
+ if (start.startsWith("<?xml ")) { // could be XCAS or XMI
+ try {
+ bcsd.setupCasFromCasMgrSerializer(readCasManager(tsiInputStream));
+ // next call decides on XMI or XCAS via content
+ return XmlCasDeserializer.deserializeR(casInputStream, aCAS,
casLoadMode == CasLoadMode.LENIENT);
+ } catch (SAXException e) {
+ throw new UIMARuntimeException(e);
+ }
+ }
+
+ // Not an XML file, decode as binary file
+ DataInputStream deserIn =
CommonSerDes.maybeWrapToDataInputStream(casInputStream);
+ if (CommonSerDes.isBinaryHeader(deserIn)) {
+
+ /*******************************************
+ * Binary, Compressed Binary (form 4 or 6)
+ ******************************************/
+ Header h = CommonSerDes.readHeader(deserIn);
+ return bcsd.reinit(h, casInputStream, readCasManager(tsiInputStream),
casLoadMode, null, AllowPreexistingFS.allow, null);
+
+ } else {
+
+ /******************************
+ * Java Object loading
+ ******************************/
+ ObjectInputStream ois = new ObjectInputStream(casInputStream);
+ try {
+ Object o = ois.readObject();
+ if (o instanceof CASSerializer) {
+ bcsd.setupCasFromCasMgrSerializer(readCasManager(tsiInputStream));
+ bcsd.reinit((CASSerializer) o); // deserialize from object
+ return SerialFormat.SERIALIZED;
+ } else if (o instanceof CASCompleteSerializer) {
+ // with a type system use that, ignore any supplied via
tsiInputStream
+ bcsd.reinit((CASCompleteSerializer) o);
+ return SerialFormat.SERIALIZED_TSI;
+ } else {
+ /**Unrecognized serialized CAS format*/
+ throw new
CASRuntimeException(CASRuntimeException.UNRECOGNIZED_SERIALIZED_CAS_FORMAT);
+ }
+ } catch (ClassNotFoundException e) {
+ /**Unrecognized serialized CAS format*/
+ throw new
CASRuntimeException(CASRuntimeException.UNRECOGNIZED_SERIALIZED_CAS_FORMAT);
+ }
+ }
+ }
+
+ /**
+ * Write the CAS in the specified format.
+ *
+ * @param aCas
+ * The CAS that should be serialized and stored
+ * @param docOS
+ * The output stream for the CAS
+ * @param format
+ * The SerialFormat in which the CAS should be stored.
+ * @throws IOException
+ * - Problem saving to the given InputStream
+ */
+ public static void save(CAS aCas, OutputStream docOS, SerialFormat format)
throws IOException {
+ save(aCas, docOS, null, format);
+ }
+
+ /**
+ * Write the CAS in the specified format. If the format does not include
typesystem information
+ * and the optional output stream of the typesystem is specified, then the
typesystem information
+ * is written there.
+ *
+ * @param aCas
+ * The CAS that should be serialized and stored
+ * @param docOS
+ * The output stream for the CAS, with appropriate buffering
+ * @param tsiOS
+ * Optional output stream for type system information. Only used if
the format does not
+ * support storing typesystem information directly in the main
output file.
+ * @param format
+ * The SerialFormat in which the CAS should be stored.
+ * @throws IOException
+ * - Problem saving to the given InputStream
+ */
+ public static void save(CAS aCas, OutputStream docOS, OutputStream tsiOS,
SerialFormat format)
+ throws IOException {
+ boolean typeSystemWritten = false;
+ try {
+ switch (format) {
+ case XMI:
+ XmiCasSerializer.serialize(aCas, docOS);
+ break;
+ case XCAS:
+ XCASSerializer.serialize(aCas, docOS, true); // true = formatted
output
+ break;
+ case SERIALIZED:
+ writeJavaObject(Serialization.serializeCAS(aCas), docOS);
+ break;
+ case SERIALIZED_TSI:
+ writeJavaObject(Serialization.serializeCASComplete((CASMgr) aCas),
docOS);
+ typeSystemWritten = true; // Embedded type system
+ break;
+ case BINARY: // Java-serialized CAS without type system
+ serializeCAS(aCas, docOS);
+ break;
+ case BINARY_TSI: // Java-serialized CAS without type
system
+ CASSerializer ser = new CASSerializer();
+ ser.addCAS((CASImpl) aCas, docOS, true);
+ break;
+ case COMPRESSED: // Binary compressed CAS without type system
(form 4)
+ serializeWithCompression(aCas, docOS);
+ break;
+ case COMPRESSED_TSI: // Binary compressed CAS without type
system (form 4)
+ new BinaryCasSerDes4((TypeSystemImpl)aCas.getTypeSystem(),
false).serializeWithTsi((CASImpl) aCas, docOS);
+ break;
+ case COMPRESSED_FILTERED: // Binary compressed CAS (form 6)
+ serializeWithCompression(aCas, docOS, false, false);
+ break;
+ case COMPRESSED_FILTERED_TS:
+ serializeWithCompression(aCas, docOS, true, false);
+ typeSystemWritten = true; // Embedded type system
+ break;
+ case COMPRESSED_FILTERED_TSI:
+ serializeWithCompression(aCas, docOS, false, true);
+ typeSystemWritten = true; // Embedded type system
+ break;
+ default:
+ StringBuilder sb = new StringBuilder();
+ for (SerialFormat sf : SerialFormat.values()) {
+ sb = sb.append(sf.toString()).append(", ");
+ }
+ throw new IllegalArgumentException("Unknown format [" + format.name()
+ + "]. Must be one of: " + sb.toString());
+ }
+ } catch (IOException e) {
+ throw e;
+ } catch (Exception e) {
+ throw new IOException(e);
+ }
+
+ // Write type system to the separate stream only if it has not already
been embedded into the
+ // main stream
+ if (tsiOS != null && !typeSystemWritten) {
+ writeTypeSystem(aCas, tsiOS, true);
+ }
+ }
+
+ private static CASMgrSerializer readCasManager(InputStream tsiInputStream)
throws IOException {
+ try {
+ if (null == tsiInputStream) {
+ return null;
+ }
+ ObjectInputStream is = new ObjectInputStream(tsiInputStream);
+ return (CASMgrSerializer) is.readObject();
+ } catch (ClassNotFoundException e) {
+ throw new IOException(e);
+ }
+ }
+
+ private static void writeJavaObject(Object o, OutputStream aOS) throws
IOException {
+ ObjectOutputStream tsiOS = new ObjectOutputStream(aOS);
+ tsiOS.writeObject(o);
+ tsiOS.flush();
+ }
+
+ public static void writeTypeSystem(CAS aCas, OutputStream aOS, boolean
includeIndexDefs) throws IOException {
+ writeJavaObject(includeIndexDefs
+ ? Serialization.serializeCASMgr((CASImpl) aCas)
+ :
Serialization.serializeCASMgrTypeSystemOnly((CASImpl) aCas)
+ , aOS);
+ }
+
+ private static void closeQuitely(Closeable closeable) {
+ if (closeable != null) {
+ try {
+ closeable.close();
+ } catch (IOException e) {
+ // do nothing
+ }
+ }
+ }
+
+}
Added:
uima/uimaj/branches/experiment-v3-jcas/uimaj-core/src/main/java/org/apache/uima/util/CasLoadMode.java
URL:
http://svn.apache.org/viewvc/uima/uimaj/branches/experiment-v3-jcas/uimaj-core/src/main/java/org/apache/uima/util/CasLoadMode.java?rev=1758045&view=auto
==============================================================================
---
uima/uimaj/branches/experiment-v3-jcas/uimaj-core/src/main/java/org/apache/uima/util/CasLoadMode.java
(added)
+++
uima/uimaj/branches/experiment-v3-jcas/uimaj-core/src/main/java/org/apache/uima/util/CasLoadMode.java
Sat Aug 27 14:29:59 2016
@@ -0,0 +1,98 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied. See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.uima.util;
+
+/**
+ * Used with CasIOUtils, maybe elsewhere, to indicate how CASes are to be
loaded or saved.
+ *
+ * TSI = serialized type system and index definitions
+ * TS = serialized type system
+ *
+ * TSI can be used to reinitialize the CAS's type system and its index
definitions.
+ * TS (which can be obtained from TSI) is used only with Compressed form 6
+ * to specify the type system used to decode the serialized data.
+ *
+ * The TS/TSI artifact is self-identifying as to which kind it is, when
deserializing.
+ *
+ * TSI and TS can be provided via two sources:
+ * - embedded in some serialized forms
+ * - via a separate artifact
+ *
+ * If both embedded and separate values are available for TS or TSI, then
embedded takes precedence, external is ignored,
+ * except for compressed form 6; in that case, both are used:
+ * - external used to reinitialize the CAS's type system and indexes
definition, and
+ * - embedded used to decode the serialized data, leniently.
+ *
+ * Compressed form 6 type system for decoding comes from the first one
available of:
+ * - embedded TS or TSI
+ * - external TS or TSI
+ * - the receiving CAS's type system
+ */
+public enum CasLoadMode {
+
+ /**
+ * Default operation:
+ *
+ * If TSI is available,
+ * reinitialize the CAS's type system and its indexes definition, except
for Compressed Form 6,
+ * using the first TSI in this list:
+ * - embedded
+ * - external
+ * (to do this for Compressed Form 6, specify REINIT)
+ * Logic for doing embedded before external:
+ * Examining each serialized form:
+ * Java Object: if embedded is available, it's the right one, a
different one causes exceptions
+ * XCas, XMI: doesn't apply - no way to have embedded
+ * Form 6 - excluded, anyway, see below
+ * Form 4 and Binary: these require the serialized type system match
the CASs, so the embedded one is always right.
+ *
+ * Compressed Form 6:
+ * - decoding: use the first type system in this list:
+ * - embedded TS/TSI
+ * - external TS/TSI
+ * - the receiving CAS's type system
+ *
+ * For all SerialFormats except Compressed type 6, default is to require
strict matching (not lenient).
+ */
+ DEFAULT,
+
+ /**
+ * Same as DEFAULT, except for XMI and XCAS formats:
+ * Specifies lenient loading for those formats, which means that the
+ * load will not indicate an error if the incoming data has types and/or
features not in the receiving CAS,
+ * but will instead silently ignore these.
+ */
+ LENIENT,
+
+ /**
+ * Used for Compressed Form 6 and to .
+ *
+ * Same as default, except that the internal and / or external TSI is used
to
+ * reinitialize the CAS's type system and its indexes definition,
+ * using the first TSI in this list:
+ * - external (to allow the embedded to specify the decoding type system)
+ * - embedded (if it is a TSI)
+ *
+ * Decode (same as DEFAULT)
+ *
+ * Error if no TSI information available
+ */
+ REINIT,
+ ;
+}
Modified:
uima/uimaj/branches/experiment-v3-jcas/uimaj-core/src/main/java/org/apache/uima/util/XmlCasDeserializer.java
URL:
http://svn.apache.org/viewvc/uima/uimaj/branches/experiment-v3-jcas/uimaj-core/src/main/java/org/apache/uima/util/XmlCasDeserializer.java?rev=1758045&r1=1758044&r2=1758045&view=diff
==============================================================================
---
uima/uimaj/branches/experiment-v3-jcas/uimaj-core/src/main/java/org/apache/uima/util/XmlCasDeserializer.java
(original)
+++
uima/uimaj/branches/experiment-v3-jcas/uimaj-core/src/main/java/org/apache/uima/util/XmlCasDeserializer.java
Sat Aug 27 14:29:59 2016
@@ -23,6 +23,7 @@ import java.io.IOException;
import java.io.InputStream;
import org.apache.uima.cas.CAS;
+import org.apache.uima.cas.SerialFormat;
import org.apache.uima.cas.impl.OutOfTypeSystemData;
import org.apache.uima.cas.impl.XCASDeserializer;
import org.apache.uima.cas.impl.XmiCasDeserializer;
@@ -59,7 +60,7 @@ public abstract class XmlCasDeserializer
}
/**
- * Deserializes a CAS from XMI.
+ * Deserializes a CAS from XMI or XCAS.
*
* @param aStream
* input stream from which to read the XML document
@@ -78,11 +79,40 @@ public abstract class XmlCasDeserializer
public static void deserialize(InputStream aStream, CAS aCAS, boolean
aLenient)
throws SAXException, IOException {
XMLReader xmlReader = XMLReaderFactory.createXMLReader();
- ContentHandler handler = new XmlCasDeserializerHandler(aCAS, aLenient);
+ XmlCasDeserializerHandler handler = new XmlCasDeserializerHandler(aCAS,
aLenient);
xmlReader.setContentHandler(handler);
xmlReader.parse(new InputSource(aStream));
}
+ /**
+ * Deserializes a CAS from XMI or XCAS, version returning the SerialFormat
+ *
+ * @param aStream
+ * input stream from which to read the XML document
+ * @param aCAS
+ * CAS into which to deserialize. This CAS must be set up with a
type system that is
+ * compatible with that in the XML
+ * @param aLenient
+ * if true, unknown Types will be ignored. If false, unknown Types
will cause an
+ * exception. The default is false.
+ * @return the format of the data
+ *
+ * @throws SAXException
+ * if an XML Parsing error occurs
+ * @throws IOException
+ * if an I/O failure occurs
+ */
+ static SerialFormat deserializeR(InputStream aStream, CAS aCAS, boolean
aLenient)
+ throws SAXException, IOException {
+ XMLReader xmlReader = XMLReaderFactory.createXMLReader();
+ XmlCasDeserializerHandler handler = new XmlCasDeserializerHandler(aCAS,
aLenient);
+ xmlReader.setContentHandler(handler);
+ xmlReader.parse(new InputSource(aStream));
+ return (handler.mDelegateHandler instanceof
XmiCasDeserializer.XmiCasDeserializerHandler)
+ ? SerialFormat.XMI
+ : SerialFormat.XCAS;
+ }
+
static class XmlCasDeserializerHandler extends DefaultHandler {
private CAS mCAS;
Propchange:
uima/uimaj/branches/experiment-v3-jcas/uimaj-core/src/main/java/org/apache/uima/util/XmlCasDeserializer.java
------------------------------------------------------------------------------
--- svn:mergeinfo (added)
+++ svn:mergeinfo Sat Aug 27 14:29:59 2016
@@ -0,0 +1,8 @@
+/incubator/uima/uimaj/branches/mavenAlign/uimaj-core/src/main/java/org/apache/uima/util/XmlCasDeserializer.java:932400-933272
+/uima/uimaj/branches/2.6.0-json/uimaj-core/src/main/java/org/apache/uima/util/XmlCasDeserializer.java:1616936-1617592
+/uima/uimaj/branches/depend-on-july-9-build-tools/uimaj-core/src/main/java/org/apache/uima/util/XmlCasDeserializer.java:963167-964468
+/uima/uimaj/branches/depend-on-parent-pom-4/uimaj-core/src/main/java/org/apache/uima/util/XmlCasDeserializer.java:961329-961745
+/uima/uimaj/branches/filteredCompress-uima-2498/uimaj-core/src/main/java/org/apache/uima/util/XmlCasDeserializer.java:1436573-1462257
+/uima/uimaj/branches/mavenAlign/uimaj-core/src/main/java/org/apache/uima/util/XmlCasDeserializer.java:933273-944396
+/uima/uimaj/branches/test-parent-pom-6/uimaj-core/src/main/java/org/apache/uima/util/XmlCasDeserializer.java:1024030
+/uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/util/XmlCasDeserializer.java:1690273-1757900