Author: schor
Date: Sat Aug 27 14:29:59 2016
New Revision: 1758045

URL: http://svn.apache.org/viewvc?rev=1758045&view=rev
Log:
[UIMA-4685] catchup merge from trunk

Added:
    
uima/uimaj/branches/experiment-v3-jcas/uimaj-core/src/main/java/org/apache/uima/util/CasIOUtils.java
    
uima/uimaj/branches/experiment-v3-jcas/uimaj-core/src/main/java/org/apache/uima/util/CasLoadMode.java
Modified:
    
uima/uimaj/branches/experiment-v3-jcas/uimaj-core/src/main/java/org/apache/uima/cas/CASRuntimeException.java
    
uima/uimaj/branches/experiment-v3-jcas/uimaj-core/src/main/java/org/apache/uima/cas/SerialFormat.java
   (contents, props changed)
    
uima/uimaj/branches/experiment-v3-jcas/uimaj-core/src/main/java/org/apache/uima/cas/impl/BinaryCasSerDes.java
    
uima/uimaj/branches/experiment-v3-jcas/uimaj-core/src/main/java/org/apache/uima/cas/impl/BinaryCasSerDes4.java
   (contents, props changed)
    
uima/uimaj/branches/experiment-v3-jcas/uimaj-core/src/main/java/org/apache/uima/cas/impl/BinaryCasSerDes6.java
   (contents, props changed)
    
uima/uimaj/branches/experiment-v3-jcas/uimaj-core/src/main/java/org/apache/uima/cas/impl/CASImpl.java
    
uima/uimaj/branches/experiment-v3-jcas/uimaj-core/src/main/java/org/apache/uima/cas/impl/CASSerializer.java
   (contents, props changed)
    
uima/uimaj/branches/experiment-v3-jcas/uimaj-core/src/main/java/org/apache/uima/cas/impl/CommonSerDes.java
    
uima/uimaj/branches/experiment-v3-jcas/uimaj-core/src/main/java/org/apache/uima/cas/impl/Serialization.java
   (contents, props changed)
    
uima/uimaj/branches/experiment-v3-jcas/uimaj-core/src/main/java/org/apache/uima/cas/impl/XmiCasDeserializer.java
    
uima/uimaj/branches/experiment-v3-jcas/uimaj-core/src/main/java/org/apache/uima/util/XmlCasDeserializer.java
   (contents, props changed)
    
uima/uimaj/branches/experiment-v3-jcas/uimaj-core/src/main/resources/org/apache/uima/UIMAException_Messages.properties

Modified: 
uima/uimaj/branches/experiment-v3-jcas/uimaj-core/src/main/java/org/apache/uima/cas/CASRuntimeException.java
URL: 
http://svn.apache.org/viewvc/uima/uimaj/branches/experiment-v3-jcas/uimaj-core/src/main/java/org/apache/uima/cas/CASRuntimeException.java?rev=1758045&r1=1758044&r2=1758045&view=diff
==============================================================================
--- 
uima/uimaj/branches/experiment-v3-jcas/uimaj-core/src/main/java/org/apache/uima/cas/CASRuntimeException.java
 (original)
+++ 
uima/uimaj/branches/experiment-v3-jcas/uimaj-core/src/main/java/org/apache/uima/cas/CASRuntimeException.java
 Sat Aug 27 14:29:59 2016
@@ -191,11 +191,17 @@ public class CASRuntimeException extends
    */
        public static final String BLOB_SERIALIZATION = "BLOB_SERIALIZATION";
 
+  /** Unrecognized serialized CAS format. */
+  public static final String UNRECOGNIZED_SERIALIZED_CAS_FORMAT = 
"UNRECOGNIZED_SERIALIZED_CAS_FORMAT";
+  
        /**
    * Error trying to read BLOB data from an input stream and deserialize 
Stringo a CAS.
    */
        public static final String BLOB_DESERIALIZATION = 
"BLOB_DESERIALIZATION";
 
+  /** Deserializing Compressed Form 6 with CasLoadMode LENIENT, but no Type 
System provided. */
+  public static final String LENIENT_FORM_6_NO_TS = "LENIENT_FORM_6_NO_TS";
+  
        /** Error trying to open a stream to Sofa data. */
        public static final String SOFADATASTREAM_ERROR = 
"SOFADATASTREAM_ERROR";
 
@@ -254,6 +260,9 @@ public class CASRuntimeException extends
   /** While FS was in the index, illegal attempt to modify Feature "{0}" which 
is used as a key in one or more indexes; FS = "{1}" */
   public static final String ILLEGAL_FEAT_SET = "ILLEGAL_FEAT_SET";
   
+  /** Lenient deserialization not support for input of type {0}. */
+  public static final String LENIENT_NOT_SUPPORTED = "LENIENT_NOT_SUPPORTED";
+  
   /** ll_setIntValue call to change the type: new type "{0}" must be a subtype 
of existing type {1}.*/
   public static final String ILLEGAL_TYPE_CHANGE = "ILLEGAL_TYPE_CHANGE";
   

Modified: 
uima/uimaj/branches/experiment-v3-jcas/uimaj-core/src/main/java/org/apache/uima/cas/SerialFormat.java
URL: 
http://svn.apache.org/viewvc/uima/uimaj/branches/experiment-v3-jcas/uimaj-core/src/main/java/org/apache/uima/cas/SerialFormat.java?rev=1758045&r1=1758044&r2=1758045&view=diff
==============================================================================
--- 
uima/uimaj/branches/experiment-v3-jcas/uimaj-core/src/main/java/org/apache/uima/cas/SerialFormat.java
 (original)
+++ 
uima/uimaj/branches/experiment-v3-jcas/uimaj-core/src/main/java/org/apache/uima/cas/SerialFormat.java
 Sat Aug 27 14:29:59 2016
@@ -23,11 +23,89 @@ package org.apache.uima.cas;
  *
  */
 public enum SerialFormat {
-  UNKNOWN,
-  XCAS,         // with reachability filtering
-  XMI,          // with reachability filtering
-  BINARY,       // no filtering
-  COMPRESSED,   // no filtering  (form 4)
-  COMPRESSED_FILTERED,   // with reachability and type and feature filtering 
(form 6)
-  COMPRESSED_PROJECTION, // with subset of views
+  
+  /**
+   *  Unknown format 
+   */
+  UNKNOWN(""), 
+  
+  /**
+   * XML-serialized CAS
+   */
+  XCAS("xcas"),
+
+  /**
+   * XML-serialized CAS
+   */
+  XMI("xmi"),
+
+  /**
+   * Plain custom binary serialized CAS without type system, no filtering
+   */
+  BINARY("bcas"),
+
+  /**
+   * Binary compressed CAS without type system, no filtering  (form 4)
+   */
+  COMPRESSED("bcas"),
+
+  /**
+   * Binary compressed CAS with reachability and type and feature filtering 
(form 6)
+   */
+  COMPRESSED_FILTERED("bcas"),
+
+  /**
+   * with subset of views (not in use)
+   */
+  COMPRESSED_PROJECTION("bcas"),
+
+  /**
+   * Java-serialized CAS without type system
+   */
+  SERIALIZED("scas"),
+
+  /**
+   * Java-serialized CAS with type system and index definitions
+   * The Typs System and Index Definition replaces the CAS's when 
deserializing.
+   */
+  SERIALIZED_TSI("scas"),
+
+  /**
+   * Binary compressed form 6 CAS with embedded type system
+   * representing the type system encoding the serialization
+   *
+   * specifies the type system used for the serialized form
+   */
+  COMPRESSED_FILTERED_TS("bcas"),
+  
+  /** 
+   * Type system and index specification included
+   * used to reinitialize the CAS and 
+   * specifies the type system used for the serialized form
+   */
+  COMPRESSED_FILTERED_TSI("bcas"),
+  
+  /**
+   * Plain custom binary serialized CAS, no filtering, plus serialized TSI
+   *   used to reinitialize the CAS
+   */
+  BINARY_TSI("bcas"),
+  
+  /**
+   * Binary Compressed Form 4, plus serialized TSI
+   *   used to reinitialize the CAS
+   */
+  COMPRESSED_TSI("bcas"),
+  ;
+  
+  private String defaultFileExtension;
+
+  SerialFormat(String defaultFileExtension) {
+    this.defaultFileExtension = defaultFileExtension;
+  }
+
+  public String getDefaultFileExtension() {
+    return defaultFileExtension;
+  }
+  
 }

Propchange: 
uima/uimaj/branches/experiment-v3-jcas/uimaj-core/src/main/java/org/apache/uima/cas/SerialFormat.java
------------------------------------------------------------------------------
--- svn:mergeinfo (added)
+++ svn:mergeinfo Sat Aug 27 14:29:59 2016
@@ -0,0 +1,8 @@
+/incubator/uima/uimaj/branches/mavenAlign/uimaj-core/src/main/java/org/apache/uima/cas/SerialFormat.java:932400-933272
+/uima/uimaj/branches/2.6.0-json/uimaj-core/src/main/java/org/apache/uima/cas/SerialFormat.java:1616936-1617592
+/uima/uimaj/branches/depend-on-july-9-build-tools/uimaj-core/src/main/java/org/apache/uima/cas/SerialFormat.java:963167-964468
+/uima/uimaj/branches/depend-on-parent-pom-4/uimaj-core/src/main/java/org/apache/uima/cas/SerialFormat.java:961329-961745
+/uima/uimaj/branches/filteredCompress-uima-2498/uimaj-core/src/main/java/org/apache/uima/cas/SerialFormat.java:1436573-1462257
+/uima/uimaj/branches/mavenAlign/uimaj-core/src/main/java/org/apache/uima/cas/SerialFormat.java:933273-944396
+/uima/uimaj/branches/test-parent-pom-6/uimaj-core/src/main/java/org/apache/uima/cas/SerialFormat.java:1024030
+/uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/cas/SerialFormat.java:1690273-1757435

Modified: 
uima/uimaj/branches/experiment-v3-jcas/uimaj-core/src/main/java/org/apache/uima/cas/impl/BinaryCasSerDes.java
URL: 
http://svn.apache.org/viewvc/uima/uimaj/branches/experiment-v3-jcas/uimaj-core/src/main/java/org/apache/uima/cas/impl/BinaryCasSerDes.java?rev=1758045&r1=1758044&r2=1758045&view=diff
==============================================================================
--- 
uima/uimaj/branches/experiment-v3-jcas/uimaj-core/src/main/java/org/apache/uima/cas/impl/BinaryCasSerDes.java
 (original)
+++ 
uima/uimaj/branches/experiment-v3-jcas/uimaj-core/src/main/java/org/apache/uima/cas/impl/BinaryCasSerDes.java
 Sat Aug 27 14:29:59 2016
@@ -22,6 +22,7 @@ package org.apache.uima.cas.impl;
 import java.io.DataInputStream;
 import java.io.IOException;
 import java.io.InputStream;
+import java.io.ObjectInputStream;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.Collection;
@@ -29,7 +30,6 @@ import java.util.Collections;
 import java.util.List;
 import java.util.Set;
 import java.util.function.Consumer;
-import java.util.function.IntConsumer;
 import java.util.function.IntFunction;
 
 import org.apache.uima.cas.CAS;
@@ -58,6 +58,7 @@ import org.apache.uima.jcas.cas.Sofa;
 import org.apache.uima.jcas.cas.StringArray;
 import org.apache.uima.jcas.cas.TOP;
 import org.apache.uima.resource.ResourceInitializationException;
+import org.apache.uima.util.CasLoadMode;
 
 /**
  * Binary (mostly non compressed) CAS deserialization
@@ -244,6 +245,34 @@ public class BinaryCasSerDes {
     reinitIndexedFSs(fsIndex, false, i -> csds.addr2fs.get(i));
   }
 
+  public CASImpl setupCasFromCasMgrSerializer(CASMgrSerializer 
casMgrSerializer) {
+
+    if (null != casMgrSerializer) {
+  
+      TypeSystemImpl ts = casMgrSerializer.getTypeSystem();
+      baseCas.installTypeSystemInAllViews(ts);
+      baseCas.commitTypeSystem();
+  
+      // reset index repositories -- wipes out Sofa index
+      baseCas.indexRepository = casMgrSerializer.getIndexRepository(baseCas);
+      baseCas.indexRepository.commit();
+  
+      // get handle to existing initial View
+      CASImpl initialView = baseCas.getInitialView();
+  
+      // throw away all other View information as the CAS definition may have
+      // changed
+      baseCas.svd.sofa2indexMap.clear();
+      baseCas.svd.sofaNbr2ViewMap.clear();
+      baseCas.svd.viewCount = 0;
+  
+      // freshen the initial view
+      initialView.refreshView(baseCas, null);
+      baseCas.setViewForSofaNbr(1, initialView);
+      baseCas.svd.viewCount = 1;
+    }
+    return baseCas;
+  }
 
   /**
    * Deserializer for CASCompleteSerializer instances - includes type system 
and index definitions
@@ -421,18 +450,77 @@ public class BinaryCasSerDes {
    */
 
   public SerialFormat reinit(InputStream istream) throws CASRuntimeException {
-   
+      
     final DataInputStream dis = 
CommonSerDes.maybeWrapToDataInputStream(istream);
   
     try {
       Header h = CommonSerDes.readHeader(dis);
+      return reinit(h, istream, null, CasLoadMode.DEFAULT, null, 
AllowPreexistingFS.allow, null);
+    } catch (IOException e) {
+      String msg = e.getMessage();
+      if (msg == null) {
+        msg = e.toString();
+      }
+      throw new CASRuntimeException(CASRuntimeException.BLOB_DESERIALIZATION, 
msg);
+    }
+  }
+  
+  /**
+   * --------------------------------------------------------------------- 
+   * Deserialize a binary input stream, after reading the header, 
+   * and optionally an externally provided type system and index spec 
+   * used in compressed form 6 serialization previously
+   * 
+   * This reads in and deserializes CAS data from a stream. Byte swapping may 
be
+   * needed if the blob is from C++ -- C++ blob serialization writes data in
+   * native byte order.
+   * 
+   * The corresponding serialization code is in 
org.apache.uima.cas.impl.Serialization,
+   * also see CasIOUtils
+   * 
+   * @param h -
+   * @param istream -
+   * @param casMgrSerializer null or the Java object representing the 
externally supplied type 
+   *                         and maybe indexes definition (TSI)
+   * @param casLoadMode DEFAULT or REINIT. REINIT required with compressed 
form 6 to
+   *                          reinitialize the cas's type system and index 
definition, for form 6.  
+   * @param f6 only used for form 6 where an instance of BinaryCasSerDes6 has 
been initialized
+   * @param allowPreexistingFS only used for form 6 delta deserialization    
+   * @return -
+   * @throws CASRuntimeException wraps IOException
+   */
+  public SerialFormat reinit(Header h, 
+                             InputStream istream, 
+                             CASMgrSerializer casMgrSerializer,
+                             CasLoadMode casLoadMode,
+                             BinaryCasSerDes6 f6,
+                             AllowPreexistingFS allowPreexistingFS,
+                             TypeSystemImpl ts) throws CASRuntimeException {
+  
+    final DataInputStream dis = 
CommonSerDes.maybeWrapToDataInputStream(istream);
+
+    CASMgrSerializer embeddedCasMgrSerializer = maybeReadEmbeddedTSI(h, dis);
+    
+    if (!h.isForm6() || casLoadMode == CasLoadMode.REINIT)  {
+      setupCasFromCasMgrSerializer(
+          (null != embeddedCasMgrSerializer && 
embeddedCasMgrSerializer.hasIndexRepository()) 
+            ? embeddedCasMgrSerializer
+            : casMgrSerializer);
+    }
       
+    if (!h.isForm6() && casLoadMode == CasLoadMode.LENIENT) {
+      /**Lenient deserialization not support for input of type {0}.*/
+      throw new CASRuntimeException(CASRuntimeException.LENIENT_NOT_SUPPORTED, 
new Object[] {h.toString()});
+    }
+
+    try {
       final boolean delta = h.isDelta;
       
       if (!delta) {
         baseCas.resetNoQuestions();
       }
       
+      
       if (h.isCompressed) {
         if (TRACE_DESER) {
           System.out.format("BinDeser version = %d%n", h.v);
@@ -440,28 +528,64 @@ public class BinaryCasSerDes {
         if (h.form4) {
           (new BinaryCasSerDes4(baseCas.getTypeSystemImpl(), false))
             .deserialize(baseCas, dis, delta, h.v);
-          return SerialFormat.COMPRESSED;
+          return h.typeSystemIndexDefIncluded ? SerialFormat.COMPRESSED_TSI : 
SerialFormat.COMPRESSED;
         } else {
+          CASMgrSerializer cms = (embeddedCasMgrSerializer != null) ? 
embeddedCasMgrSerializer : casMgrSerializer; 
+          TypeSystemImpl tsRead = (cms != null) ? cms.getTypeSystem() : null;
+          if (null != tsRead) {
+            tsRead.commit();  // no generators set up
+          }
+            
+          TypeSystemImpl ts_for_decoding =
+              (tsRead != null && embeddedCasMgrSerializer != null) 
+                ? tsRead                      // first choice: embedded - it's 
always correct
+                : (ts != null)                // 2nd choice is passed in ts 
arg, either ts or f6.getTgtTs() 
+                    ? ts
+                    : (f6 != null && f6.getTgtTs() != null)
+                        ? f6.getTgtTs()       // this is the ts passed in via 
BinaryCasSerDes6 constructor
+                        : tsRead;             // last choice: the ts read from 
2nd input to load() in CasIOUtils
+              
           try {
-            (new BinaryCasSerDes6(baseCas)).deserializeAfterVersion(dis, 
delta, AllowPreexistingFS.allow);
+            BinaryCasSerDes6 bcsd = (f6 != null) 
+                ? new BinaryCasSerDes6(f6, ts_for_decoding)
+                : new BinaryCasSerDes6(baseCas, ts_for_decoding);
+            bcsd.deserializeAfterVersion(dis, delta, AllowPreexistingFS.allow);
+            return h.typeSystemIndexDefIncluded 
+                ? SerialFormat.COMPRESSED_FILTERED_TSI
+                : h.typeSystemIncluded 
+                    ? SerialFormat.COMPRESSED_FILTERED_TS
+                    : SerialFormat.COMPRESSED_FILTERED;
           } catch (ResourceInitializationException e) {
             throw new 
CASRuntimeException(CASRuntimeException.DESERIALIZING_COMPRESSED_BINARY_UNSUPPORTED,
 null, e);
           }
-          return SerialFormat.COMPRESSED_FILTERED;
         }
       }
      
       return binaryDeserialization(h);
-      
     } catch (IOException e) {
       String msg = e.getMessage();
       if (msg == null) {
         msg = e.toString();
       }
       throw new CASRuntimeException(CASRuntimeException.BLOB_DESERIALIZATION, 
msg);
+    }     
+    
+  }
+  
+  static CASMgrSerializer maybeReadEmbeddedTSI(Header h, DataInputStream dis) 
{  
+    if (h.isTypeSystemIncluded() || h.isTypeSystemIndexDefIncluded()) { // 
Load TS from CAS stream
+      try {
+        ObjectInputStream ois = new ObjectInputStream(dis);
+        return (CASMgrSerializer) ois.readObject();
+      } catch (ClassNotFoundException | IOException e) {
+        /**Unrecognized serialized CAS format*/
+        throw new 
CASRuntimeException(CASRuntimeException.UNRECOGNIZED_SERIALIZED_CAS_FORMAT, 
null, e);
+      }
     }
+    return null;
   }
 
+
   /************************************************************
    * ------   NON COMPRESSED BINARY DESEERIALIZATION   ------ *
    *  For corresponding serialization code, see CASSerializer *

Modified: 
uima/uimaj/branches/experiment-v3-jcas/uimaj-core/src/main/java/org/apache/uima/cas/impl/BinaryCasSerDes4.java
URL: 
http://svn.apache.org/viewvc/uima/uimaj/branches/experiment-v3-jcas/uimaj-core/src/main/java/org/apache/uima/cas/impl/BinaryCasSerDes4.java?rev=1758045&r1=1758044&r2=1758045&view=diff
==============================================================================
--- 
uima/uimaj/branches/experiment-v3-jcas/uimaj-core/src/main/java/org/apache/uima/cas/impl/BinaryCasSerDes4.java
 (original)
+++ 
uima/uimaj/branches/experiment-v3-jcas/uimaj-core/src/main/java/org/apache/uima/cas/impl/BinaryCasSerDes4.java
 Sat Aug 27 14:29:59 2016
@@ -37,7 +37,6 @@ import java.io.OutputStream;
 import java.util.ArrayList;
 import java.util.Arrays;
 import java.util.BitSet;
-import java.util.Collections;
 import java.util.List;
 import java.util.function.Consumer;
 import java.util.zip.Deflater;
@@ -340,12 +339,18 @@ public class BinaryCasSerDes4 implements
     
     Serializer serializer = new Serializer(
         casImpl, makeDataOutputStream(out), (MarkerImpl) trackingMark, sm,
-        compressLevel, compressStrategy);
+        compressLevel, compressStrategy, false);
    
     serializer.serialize();
     return sm;
   }
   
+  public void serializeWithTsi(CASImpl casImpl, Object out) throws IOException 
{
+    Serializer serializer = new Serializer(
+        casImpl, makeDataOutputStream(out), null, null, CompressLevel.Default, 
CompressStrat.Default, true);
+    serializer.serialize();
+  }
+  
   public SerializationMeasures serialize(AbstractCas cas, Object out, Marker 
trackingMark,
       CompressLevel compressLevel) throws IOException {
     return serialize(cas, out,trackingMark, compressLevel, 
CompressStrat.Default);
@@ -391,6 +396,7 @@ public class BinaryCasSerDes4 implements
 //    final private ByteHeap byteHeapObj;
 
     final private boolean isDelta;        // if true, there is a marker 
indicating the start spot(s)
+    final private boolean isTsi;          // true to include the type system 
and indexes definition
     final private boolean doMeasurement;  // if true, doing measurements
 //    final private ComprItemRefs fsStartIndexes = 
(CHANGE_FS_REFS_TO_SEQUENTIAL) ? new ComprItemRefs() : null;
 //    final private int[] typeCodeHisto = new int[ts.getTypeArraySize()]; 
@@ -460,7 +466,8 @@ public class BinaryCasSerDes4 implements
     private Serializer(CASImpl cas, DataOutputStream serializedOut, MarkerImpl 
mark,
                        SerializationMeasures sm,
                        CompressLevel compressLevel,
-                       CompressStrat compressStrategy) {
+                       CompressStrat compressStrategy,
+                       boolean isTsi) {
       this.baseCas = cas.getBaseCAS();
       this.bcsd = cas.getBinaryCasSerDes();
       this.isDelta = (mark != null);
@@ -473,6 +480,7 @@ public class BinaryCasSerDes4 implements
       this.sm = sm;
       this.compressLevel = compressLevel;
       this.compressStrategy = compressStrategy;
+      this.isTsi = isTsi;
       
       doMeasurement = (sm != null);
       
@@ -532,16 +540,17 @@ public class BinaryCasSerDes4 implements
 //        sm.origAuxLongs = baseCas.getLongHeap().getSize() * 8;
 //        sm.totalTime = System.currentTimeMillis();
 //      }
-
+      
       /************************
        * Write standard header
        ************************/
       CommonSerDes.createHeader()
-      .v3()
-      .seqVer(2)    // 0 - original, 1 - UIMA-4743, 2 - v3 
-      .form4()
-      .delta(isDelta)
-      .write(serializedOut);
+        .v3()
+        .seqVer(2)    // 0 - original, 1 - UIMA-4743, 2 - v3 
+        .form4()
+        .delta(isDelta)
+        .typeSystemIndexDefIncluded(isTsi)
+        .write(serializedOut);
              
       if (TRACE_SER) System.out.println("Form4Ser start, delta: " + (isDelta ? 
"true" : "false"));
       
/*******************************************************************************
@@ -983,14 +992,14 @@ public class BinaryCasSerDes4 implements
           DeflaterOutputStream cds = new DeflaterOutputStream(baosZipped, 
deflater, zipBufSize);       
           baos.writeTo(cds);
           cds.close();
-          idxAndLen.add(i);
+          idxAndLen.add(Integer.valueOf(i));
           if (doMeasurement) {
-            idxAndLen.add((int)(sm.statDetails[i].afterZip = 
deflater.getBytesWritten()));            
-            idxAndLen.add((int)(sm.statDetails[i].beforeZip = 
deflater.getBytesRead()));
+            idxAndLen.add(Integer.valueOf((int)(sm.statDetails[i].afterZip = 
deflater.getBytesWritten())));            
+            idxAndLen.add(Integer.valueOf((int)(sm.statDetails[i].beforeZip = 
deflater.getBytesRead())));
             sm.statDetails[i].zipTime = System.currentTimeMillis() - startTime;
           } else {
-            idxAndLen.add((int)deflater.getBytesWritten());            
-            idxAndLen.add((int)deflater.getBytesRead());
+            idxAndLen.add(Integer.valueOf((int)deflater.getBytesWritten()));   
         
+            idxAndLen.add(Integer.valueOf((int)deflater.getBytesRead()));
           }
         } 
       }
@@ -1672,7 +1681,8 @@ public class BinaryCasSerDes4 implements
     }
     
     private void deserialize(int version1) throws IOException {
-      
+      if (TRACE_DES) System.out.println("Form4Deser starting");
+     
       if (TRACE_DES) System.out.println("Form4Deser starting");
       isBeforeV3 = (version1 & 0xff00) == 0;
       

Propchange: 
uima/uimaj/branches/experiment-v3-jcas/uimaj-core/src/main/java/org/apache/uima/cas/impl/BinaryCasSerDes4.java
------------------------------------------------------------------------------
--- svn:mergeinfo (added)
+++ svn:mergeinfo Sat Aug 27 14:29:59 2016
@@ -0,0 +1,8 @@
+/incubator/uima/uimaj/branches/mavenAlign/uimaj-core/src/main/java/org/apache/uima/cas/impl/BinaryCasSerDes4.java:932400-933272
+/uima/uimaj/branches/2.6.0-json/uimaj-core/src/main/java/org/apache/uima/cas/impl/BinaryCasSerDes4.java:1616936-1617592
+/uima/uimaj/branches/depend-on-july-9-build-tools/uimaj-core/src/main/java/org/apache/uima/cas/impl/BinaryCasSerDes4.java:963167-964468
+/uima/uimaj/branches/depend-on-parent-pom-4/uimaj-core/src/main/java/org/apache/uima/cas/impl/BinaryCasSerDes4.java:961329-961745
+/uima/uimaj/branches/filteredCompress-uima-2498/uimaj-core/src/main/java/org/apache/uima/cas/impl/BinaryCasSerDes4.java:1436573-1462257
+/uima/uimaj/branches/mavenAlign/uimaj-core/src/main/java/org/apache/uima/cas/impl/BinaryCasSerDes4.java:933273-944396
+/uima/uimaj/branches/test-parent-pom-6/uimaj-core/src/main/java/org/apache/uima/cas/impl/BinaryCasSerDes4.java:1024030
+/uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/cas/impl/BinaryCasSerDes4.java:1690273-1757904

Modified: 
uima/uimaj/branches/experiment-v3-jcas/uimaj-core/src/main/java/org/apache/uima/cas/impl/BinaryCasSerDes6.java
URL: 
http://svn.apache.org/viewvc/uima/uimaj/branches/experiment-v3-jcas/uimaj-core/src/main/java/org/apache/uima/cas/impl/BinaryCasSerDes6.java?rev=1758045&r1=1758044&r2=1758045&view=diff
==============================================================================
--- 
uima/uimaj/branches/experiment-v3-jcas/uimaj-core/src/main/java/org/apache/uima/cas/impl/BinaryCasSerDes6.java
 (original)
+++ 
uima/uimaj/branches/experiment-v3-jcas/uimaj-core/src/main/java/org/apache/uima/cas/impl/BinaryCasSerDes6.java
 Sat Aug 27 14:29:59 2016
@@ -36,6 +36,7 @@ import java.io.FileNotFoundException;
 import java.io.FileOutputStream;
 import java.io.IOException;
 import java.io.InputStream;
+import java.io.ObjectOutputStream;
 import java.io.OutputStream;
 import java.util.ArrayList;
 import java.util.Arrays;
@@ -81,6 +82,8 @@ import org.apache.uima.jcas.cas.Sofa;
 import org.apache.uima.jcas.cas.StringArray;
 import org.apache.uima.jcas.cas.TOP;
 import org.apache.uima.resource.ResourceInitializationException;
+import org.apache.uima.util.CasIOUtils;
+import org.apache.uima.util.CasLoadMode;
 import org.apache.uima.util.impl.DataIO;
 import org.apache.uima.util.impl.OptimizeStrings;
 import org.apache.uima.util.impl.SerializationMeasures;
@@ -315,9 +318,17 @@ public class BinaryCasSerDes6 implements
   private OptimizeStrings os;
   private boolean only1CommonString;  // true if only one common string
 
+  private boolean isTsIncluded;   // type system used for the serialization
+  private boolean isTsiIncluded;  // types plus index definition, used to 
reset the cas
+
 //  private TypeInfo typeInfo; // type info for the current type being 
serialized/deserialized
 //                             // always the "src" typeInfo I think, except 
for compareCas use
   final private CasTypeSystemMapper typeMapper;
+  
+  /**
+   * This is the used version of isTypeMapping, normally == to isTypeMappingCmn
+   *   But compareCASes sets this false temporarily while setting up the 
compare
+   */
   private boolean isTypeMapping;
 
 //  /**
@@ -498,6 +509,7 @@ public class BinaryCasSerDes6 implements
    * @param compressStrategy if not null, specifies enum instance for compress 
strategy
    * @throws ResourceInitializationException if the target type system is 
incompatible with the source type system
    */
+   
   public BinaryCasSerDes6(
       AbstractCas aCas,
       MarkerImpl mark,
@@ -506,6 +518,19 @@ public class BinaryCasSerDes6 implements
       boolean doMeasurements,
       CompressLevel compressLevel, 
       CompressStrat compressStrategy) throws ResourceInitializationException {
+    this(aCas, mark, tgtTs, false, false, rfs, doMeasurements, compressLevel, 
compressStrategy);
+  }
+   
+  private BinaryCasSerDes6(
+      AbstractCas aCas,
+      MarkerImpl mark,
+      TypeSystemImpl tgtTs,
+      boolean storeTS,
+      boolean storeTSI,
+      ReuseInfo rfs,
+      boolean doMeasurements,
+      CompressLevel compressLevel, 
+      CompressStrat compressStrategy) throws ResourceInitializationException {
     cas = ((CASImpl) ((aCas instanceof JCas) ? ((JCas)aCas).getCas(): 
aCas)).getBaseCAS();
     bcsd = cas.getBinaryCasSerDes();
     
@@ -522,6 +547,8 @@ public class BinaryCasSerDes6 implements
     isDelta = isSerializingDelta = (mark != null);
     typeMapper = srcTs.getTypeSystemMapper(tgtTs);
     isTypeMapping = (null != typeMapper);
+    isTsIncluded = storeTS;
+    isTsiIncluded = storeTSI;
     
 //    heap = cas.getHeap().heap;
 //    heapEnd = cas.getHeap().getCellsUsed();
@@ -554,12 +581,55 @@ public class BinaryCasSerDes6 implements
   }
   
   /**
+   * only called to set up for deserialization.
+   * clones existing f6, but changes the tgtTs (used to decode)
+   * @param f6 -
+   * @param tgtTs used for decoding
+   * @throws ResourceInitializationException -
+   */
+  BinaryCasSerDes6(BinaryCasSerDes6 f6, TypeSystemImpl tgtTs) throws 
ResourceInitializationException {
+    this.cas = f6.cas;
+    this.bcsd = f6.bcsd;
+    this.stringHeapObj = f6.stringHeapObj;
+    this.nextFsId = f6.nextFsId;
+
+    this.srcTs = f6.srcTs;
+    this.tgtTs = tgtTs;  // passed in argument !
+    this.compressLevel = f6.compressLevel;
+    this.compressStrategy = f6.compressStrategy;
+
+    this.mark = f6.mark;
+    if (null != mark && !mark.isValid() ) {
+      throw new CASRuntimeException(
+                CASRuntimeException.INVALID_MARKER, "Invalid Marker.");
+    }
+    
+    this.isDelta = this.isSerializingDelta = (mark != null);
+    this.fsStartIndexes = f6.fsStartIndexes;
+    this.reuseInfoProvided = f6.reuseInfoProvided;
+    this.doMeasurements = f6.doMeasurements;
+    this.sm = f6.sm;
+    
+    this.isTsIncluded = f6.isTsIncluded;
+    this.isTsiIncluded = f6.isTsiIncluded;
+    
+    this.typeMapper = srcTs.getTypeSystemMapper(tgtTs);
+    this.isTypeMapping = (null != typeMapper);
+    this.prevHeapInstanceWithIntValues = f6.prevHeapInstanceWithIntValues;
+    this.prevFsWithLongValues = f6.prevFsWithLongValues;
+    this.foundFSs = f6.foundFSs;
+    this.foundFSsBelowMark = f6.foundFSsBelowMark;
+    this.fssToSerialize = f6.fssToSerialize;
+    
+   }
+  
+  /**
    * Setup to serialize (not delta) or deserialize (not delta) using binary 
compression, no type mapping but only processing reachable Feature Structures
    * @param cas -
    * @throws ResourceInitializationException never thrown 
    */
   public BinaryCasSerDes6(AbstractCas cas) throws 
ResourceInitializationException {
-    this(cas, null, null, null, false, CompressLevel.Default, 
CompressStrat.Default);
+    this(cas, null, null, false, false, null, false, CompressLevel.Default, 
CompressStrat.Default);
   }
   
   /**
@@ -569,7 +639,7 @@ public class BinaryCasSerDes6 implements
    * @throws ResourceInitializationException if the target type system is 
incompatible with the source type system
    */
   public BinaryCasSerDes6(AbstractCas cas, TypeSystemImpl tgtTs) throws 
ResourceInitializationException {
-    this(cas, null, tgtTs, null, false, CompressLevel.Default, 
CompressStrat.Default);
+    this(cas, null, tgtTs, false, false, null, false, CompressLevel.Default, 
CompressStrat.Default);
   }
 
   /**
@@ -581,7 +651,7 @@ public class BinaryCasSerDes6 implements
    * @throws ResourceInitializationException if the target type system is 
incompatible with the source type system
    */
   public BinaryCasSerDes6(AbstractCas cas, MarkerImpl mark, TypeSystemImpl 
tgtTs, ReuseInfo rfs) throws ResourceInitializationException {
-    this(cas, mark, tgtTs, rfs, false, CompressLevel.Default, 
CompressStrat.Default);
+    this(cas, mark, tgtTs, false, false, rfs, false, CompressLevel.Default, 
CompressStrat.Default);
   }
   
   /**
@@ -594,7 +664,7 @@ public class BinaryCasSerDes6 implements
    * @throws ResourceInitializationException if the target type system is 
incompatible with the source type system
    */
   public BinaryCasSerDes6(AbstractCas cas, MarkerImpl mark, TypeSystemImpl 
tgtTs, ReuseInfo rfs, boolean doMeasurements) throws 
ResourceInitializationException {
-    this(cas, mark, tgtTs, rfs, doMeasurements, CompressLevel.Default, 
CompressStrat.Default);
+    this(cas, mark, tgtTs, false, false, rfs, doMeasurements, 
CompressLevel.Default, CompressStrat.Default);
   }
 
   /**
@@ -604,7 +674,19 @@ public class BinaryCasSerDes6 implements
    * @throws ResourceInitializationException never thrown
    */
   public BinaryCasSerDes6(AbstractCas cas, ReuseInfo rfs) throws 
ResourceInitializationException {
-    this(cas, null, null, rfs, false, CompressLevel.Default, 
CompressStrat.Default);
+    this(cas, null, null, false, false, rfs, false, CompressLevel.Default, 
CompressStrat.Default);
+  }
+
+  /**
+   * Setup to serialize (not delta) or deserialize (maybe delta) using binary 
compression, no type mapping, optionally storing TSI, and only processing 
reachable Feature Structures
+   * @param cas -
+   * @param rfs -
+   * @param storeTS - 
+   * @param storeTSI - 
+   * @throws ResourceInitializationException never thrown
+   */
+  public BinaryCasSerDes6(AbstractCas cas, ReuseInfo rfs, boolean storeTS, 
boolean storeTSI) throws ResourceInitializationException {
+    this(cas, null, null, storeTS, storeTSI, rfs, false, 
CompressLevel.Default, CompressStrat.Default);
   }
 
   
/*********************************************************************************************
@@ -1592,7 +1674,7 @@ public class BinaryCasSerDes6 implements
    * @throws IOException -
    */
   public void deserialize(InputStream istream) throws IOException {
-    readHeader(istream);
+    Header h = readHeader(istream);  // side effect, sets deserIn
 
     if (isReadingDelta) {
       if (!reuseInfoProvided) {
@@ -1601,8 +1683,9 @@ public class BinaryCasSerDes6 implements
     } else {
       cas.resetNoQuestions();
     }
-
-    deserializeAfterVersion(deserIn, isReadingDelta, AllowPreexistingFS.allow);
+      
+    bcsd.reinit(h, deserIn, null, CasLoadMode.DEFAULT, this, 
AllowPreexistingFS.allow, null);
+//    deserializeAfterVersion(deserIn, isReadingDelta, 
AllowPreexistingFS.allow);
   }
   
   /**
@@ -1612,7 +1695,7 @@ public class BinaryCasSerDes6 implements
    * @throws IOException passthru
    */
   public void deserialize(InputStream istream, AllowPreexistingFS 
allowPreexistingFS) throws IOException {
-    readHeader(istream);
+    Header h = readHeader(istream);
 
     if (isReadingDelta) {
       if (!reuseInfoProvided) {
@@ -1622,7 +1705,7 @@ public class BinaryCasSerDes6 implements
       throw new UnsupportedOperationException("Delta CAS required for this 
call");
     }
 
-    deserializeAfterVersion(deserIn, isReadingDelta, allowPreexistingFS);
+    bcsd.reinit(h, deserIn, null, CasLoadMode.DEFAULT, this, 
allowPreexistingFS, null);
   }
   
   
@@ -3751,7 +3834,7 @@ public class BinaryCasSerDes6 implements
    * @throws IOException passthru
    *********************************************/
   
-  private void readHeader(InputStream istream) throws IOException {
+  private Header readHeader(InputStream istream) throws IOException {
     deserIn = CommonSerDes.maybeWrapToDataInputStream(istream);
     Header h = CommonSerDes.readHeader(deserIn);
     
@@ -3765,9 +3848,7 @@ public class BinaryCasSerDes6 implements
     }
     
     isReadingDelta = isDelta = h.isDelta;
-    
-    
-    
+    return h;
   }
   
   /* *******************************************
@@ -3843,6 +3924,10 @@ public class BinaryCasSerDes6 implements
     return v; 
   }
   
+  TypeSystemImpl getTgtTs() {
+    return this.tgtTs;
+  }
+  
   // number of views:              cas.getNumberOfViews()
   // number of sofas               cas.getNumberOfSofas()
   // [sofa-1 ... sofa-n]           cas.getSofaIterator()

Propchange: 
uima/uimaj/branches/experiment-v3-jcas/uimaj-core/src/main/java/org/apache/uima/cas/impl/BinaryCasSerDes6.java
------------------------------------------------------------------------------
--- svn:mergeinfo (added)
+++ svn:mergeinfo Sat Aug 27 14:29:59 2016
@@ -0,0 +1,8 @@
+/incubator/uima/uimaj/branches/mavenAlign/uimaj-core/src/main/java/org/apache/uima/cas/impl/BinaryCasSerDes6.java:932400-933272
+/uima/uimaj/branches/2.6.0-json/uimaj-core/src/main/java/org/apache/uima/cas/impl/BinaryCasSerDes6.java:1616936-1617592
+/uima/uimaj/branches/depend-on-july-9-build-tools/uimaj-core/src/main/java/org/apache/uima/cas/impl/BinaryCasSerDes6.java:963167-964468
+/uima/uimaj/branches/depend-on-parent-pom-4/uimaj-core/src/main/java/org/apache/uima/cas/impl/BinaryCasSerDes6.java:961329-961745
+/uima/uimaj/branches/filteredCompress-uima-2498/uimaj-core/src/main/java/org/apache/uima/cas/impl/BinaryCasSerDes6.java:1436573-1462257
+/uima/uimaj/branches/mavenAlign/uimaj-core/src/main/java/org/apache/uima/cas/impl/BinaryCasSerDes6.java:933273-944396
+/uima/uimaj/branches/test-parent-pom-6/uimaj-core/src/main/java/org/apache/uima/cas/impl/BinaryCasSerDes6.java:1024030
+/uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/cas/impl/BinaryCasSerDes6.java:1690273-1757885

Modified: 
uima/uimaj/branches/experiment-v3-jcas/uimaj-core/src/main/java/org/apache/uima/cas/impl/CASImpl.java
URL: 
http://svn.apache.org/viewvc/uima/uimaj/branches/experiment-v3-jcas/uimaj-core/src/main/java/org/apache/uima/cas/impl/CASImpl.java?rev=1758045&r1=1758044&r2=1758045&view=diff
==============================================================================
--- 
uima/uimaj/branches/experiment-v3-jcas/uimaj-core/src/main/java/org/apache/uima/cas/impl/CASImpl.java
 (original)
+++ 
uima/uimaj/branches/experiment-v3-jcas/uimaj-core/src/main/java/org/apache/uima/cas/impl/CASImpl.java
 Sat Aug 27 14:29:59 2016
@@ -295,7 +295,7 @@ public class CASImpl extends AbstractCas
     
     // A map from SofaNumbers which are also view numbers to IndexRepositories.
     // these numbers are dense, and start with 1.  1 is the initial view.  0 
is the base cas
-    private ArrayList<FSIndexRepositoryImpl> sofa2indexMap;
+    ArrayList<FSIndexRepositoryImpl> sofa2indexMap;
 
 
     /**
@@ -309,7 +309,7 @@ public class CASImpl extends AbstractCas
      * However, the maximum view count is reset; so creation of new views 
"reuses" these pre-setup indexRepos 
      * associated with these views.
      */
-    private ArrayList<CASImpl> sofaNbr2ViewMap;
+    ArrayList<CASImpl> sofaNbr2ViewMap;
 
     /**
      * a set of instantiated sofaNames

Modified: 
uima/uimaj/branches/experiment-v3-jcas/uimaj-core/src/main/java/org/apache/uima/cas/impl/CASSerializer.java
URL: 
http://svn.apache.org/viewvc/uima/uimaj/branches/experiment-v3-jcas/uimaj-core/src/main/java/org/apache/uima/cas/impl/CASSerializer.java?rev=1758045&r1=1758044&r2=1758045&view=diff
==============================================================================
--- 
uima/uimaj/branches/experiment-v3-jcas/uimaj-core/src/main/java/org/apache/uima/cas/impl/CASSerializer.java
 (original)
+++ 
uima/uimaj/branches/experiment-v3-jcas/uimaj-core/src/main/java/org/apache/uima/cas/impl/CASSerializer.java
 Sat Aug 27 14:29:59 2016
@@ -42,6 +42,7 @@ import org.apache.uima.jcas.cas.LongArra
 import org.apache.uima.jcas.cas.ShortArray;
 import org.apache.uima.jcas.cas.StringArray;
 import org.apache.uima.jcas.cas.TOP;
+import org.apache.uima.util.CasIOUtils;
 
 /**
  * This object has 2 purposes.
@@ -219,6 +220,10 @@ public class CASSerializer implements Se
       dos.writeInt(shdh.refHeap[i + 
StringHeapDeserializationHelper.CHAR_HEAP_STRLEN_OFFSET]);
     }
   }
+  
+  void addTsiCAS(CASImpl cas, OutputStream ostream) {
+    
+  }
 
   /**
    * Serializes the CAS data and writes it to the output stream.
@@ -249,6 +254,10 @@ public class CASSerializer implements Se
    * @param ostream -
    */
   public void addCAS(CASImpl cas, OutputStream ostream) {
+      addCAS(cas, ostream, false);
+  }
+  
+  public void addCAS(CASImpl cas, OutputStream ostream, boolean includeTsi) {
     final BinaryCasSerDes bcsd = cas.getBinaryCasSerDes();
     
     final CommonSerDesSequential csds = 
BinaryCasSerDes4.getCsds(cas.getBaseCAS(), false);  // saves the csds in the 
cas, used for delta
@@ -264,9 +273,14 @@ public class CASSerializer implements Se
       // output the key and version number
       CommonSerDes.createHeader()
         .seqVer(1)  // 0 original, 1 UIMA-4743
+        .typeSystemIndexDefIncluded(includeTsi)
         .v3()
         .write(dos);
-      
+
+     if (includeTsi) {
+        CasIOUtils.writeTypeSystem(cas, ostream, true);
+      }
+            
       // output the FS heap
       final int heapSize = bcsd.heap.getCellsUsed();
       dos.writeInt(heapSize);
@@ -536,7 +550,7 @@ public class CASSerializer implements Se
       writeMods(chgByteAddr, dos, i -> dos.writeByte(chgByteValues.heap[i]));
 
       // word alignment
-      align = (4 - (byteheapsz % 4)) % 4;
+      align = (4 - (chgByteAddr.size() % 4)) % 4;
       for (int i = 0; i < align; i++) {
         dos.writeByte(0);
       }
@@ -545,7 +559,7 @@ public class CASSerializer implements Se
       writeMods(chgShortAddr, dos, i -> 
dos.writeShort(chgShortValues.heap[i]));
 
       // word alignment
-      if (shortheapsz % 2 != 0) {
+      if (chgShortAddr.size() % 2 != 0) {
         dos.writeShort(0);
       }
 

Propchange: 
uima/uimaj/branches/experiment-v3-jcas/uimaj-core/src/main/java/org/apache/uima/cas/impl/CASSerializer.java
------------------------------------------------------------------------------
--- svn:mergeinfo (added)
+++ svn:mergeinfo Sat Aug 27 14:29:59 2016
@@ -0,0 +1,8 @@
+/incubator/uima/uimaj/branches/mavenAlign/uimaj-core/src/main/java/org/apache/uima/cas/impl/CASSerializer.java:932400-933272
+/uima/uimaj/branches/2.6.0-json/uimaj-core/src/main/java/org/apache/uima/cas/impl/CASSerializer.java:1616936-1617592
+/uima/uimaj/branches/depend-on-july-9-build-tools/uimaj-core/src/main/java/org/apache/uima/cas/impl/CASSerializer.java:963167-964468
+/uima/uimaj/branches/depend-on-parent-pom-4/uimaj-core/src/main/java/org/apache/uima/cas/impl/CASSerializer.java:961329-961745
+/uima/uimaj/branches/filteredCompress-uima-2498/uimaj-core/src/main/java/org/apache/uima/cas/impl/CASSerializer.java:1436573-1462257
+/uima/uimaj/branches/mavenAlign/uimaj-core/src/main/java/org/apache/uima/cas/impl/CASSerializer.java:933273-944396
+/uima/uimaj/branches/test-parent-pom-6/uimaj-core/src/main/java/org/apache/uima/cas/impl/CASSerializer.java:1024030
+/uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/cas/impl/CASSerializer.java:1690273-1757907

Modified: 
uima/uimaj/branches/experiment-v3-jcas/uimaj-core/src/main/java/org/apache/uima/cas/impl/CommonSerDes.java
URL: 
http://svn.apache.org/viewvc/uima/uimaj/branches/experiment-v3-jcas/uimaj-core/src/main/java/org/apache/uima/cas/impl/CommonSerDes.java?rev=1758045&r1=1758044&r2=1758045&view=diff
==============================================================================
--- 
uima/uimaj/branches/experiment-v3-jcas/uimaj-core/src/main/java/org/apache/uima/cas/impl/CommonSerDes.java
 (original)
+++ 
uima/uimaj/branches/experiment-v3-jcas/uimaj-core/src/main/java/org/apache/uima/cas/impl/CommonSerDes.java
 Sat Aug 27 14:29:59 2016
@@ -26,6 +26,8 @@ import java.io.InputStream;
 import java.io.OutputStream;
 import java.nio.ByteBuffer;
 
+import org.apache.uima.UIMARuntimeException;
+
 /**
  * Common de/serialization 
  */
@@ -48,6 +50,7 @@ public class CommonSerDes {
    *     - bit in 0x01 position: on for binary non-delta (redundant)   
    *     - bit in 0x02 position: on means delta, off - not delta
    *     - bit in 0x04 position: on means compressed, off means plain binary
+   *     - bit in 0x08 position: on means type system included
    *     - bits  0xF8 reserved
    *     
    *     - byte in 0xFF 00 position: incrementing (starting w/ 0) version
@@ -63,12 +66,14 @@ public class CommonSerDes {
    *     - bit in 0x01 position: on means form6, off = form 4 
    *********************************************/
   
-  static class Header {
+  public static class Header {
     boolean isDelta;
     boolean isCompressed;
     boolean isV3style;
     boolean form4;
     boolean form6;
+    boolean typeSystemIncluded;  // for form 6, TS only
+    boolean typeSystemIndexDefIncluded;
     byte seqVersionNbr;
     boolean isV3;
     boolean swap;
@@ -77,18 +82,21 @@ public class CommonSerDes {
     
     Reading reading;
     
-    Header delta() {isDelta = true;  return this; }
-    Header delta(boolean v2) {isDelta = v2;  return this; }
-    Header form4() {isCompressed = form4 = true; form6 = false; return this; }
-    Header form6() {isCompressed = form6 = true; form4 = false; return this; }
-    Header seqVer(int v2) { assert (v2 >= 0 && v2 < 256); seqVersionNbr = 
(byte)v2; return this; }
-    Header v3() {isV3 = true; return this; }
+    public Header delta() {isDelta = true;  return this; }
+    public Header delta(boolean v2) {isDelta = v2;  return this; }
+    public Header form4() {isCompressed = form4 = true; form6 = false; return 
this; }
+    public Header form6() {isCompressed = form6 = true; form4 = false; return 
this; }
+    public Header typeSystemIncluded(boolean f) {typeSystemIncluded = f; 
return this; }
+    public Header typeSystemIndexDefIncluded(boolean f) 
{typeSystemIndexDefIncluded = f; return this; }
+    public Header seqVer(int v2) { assert (v2 >= 0 && v2 < 256); seqVersionNbr 
= (byte)v2; return this; }
+    public Header v3() {isV3 = true; return this; }
     
     
-    void write(DataOutputStream dos) throws IOException {
+    public void write(DataOutputStream dos) throws IOException {
       v = (!isCompressed && !isDelta) ? 1 : 0;
       if (isDelta) v |= 0x02;
       if (isCompressed) v |= 0x04;
+      if (typeSystemIndexDefIncluded) v |= 0x08;
       v |= (seqVersionNbr << 8);
       if (isV3) v |= 0x010000;
       
@@ -107,15 +115,66 @@ public class CommonSerDes {
       if (isCompressed) {
         dos.writeInt(form6 ? 1 : 0);
       }
+      
+    }
+    
+    public boolean isDelta() {
+      return isDelta;
+    }
+    public boolean isCompressed() {
+      return isCompressed;
     }
+    public boolean isV3style() {
+      return isV3style;
+  }
+    public boolean isForm4() {
+      return form4;
+    }
+    public boolean isForm6() {
+      return form6;
+    }
+    public boolean isTypeSystemIndexDefIncluded() {
+      return typeSystemIndexDefIncluded;
+    }
+    public boolean isTypeSystemIncluded() {
+      return typeSystemIncluded;
+    }    
+    public byte getSeqVersionNbr() {
+      return seqVersionNbr;
+    }
+    public boolean isV3() {
+      return isV3;
+    }
+
+    
   }
   
-  static Header createHeader() {
+  public static Header createHeader() {
     return new Header();
   }
   
+  public static boolean isBinaryHeader(DataInputStream dis) {
+    dis.mark(4);
+    byte[] bytebuf = new byte[4];
+    try {
+      bytebuf[0] = dis.readByte(); // U
+      bytebuf[1] = dis.readByte(); // I
+      bytebuf[2] = dis.readByte(); // M
+      bytebuf[3] = dis.readByte(); // A
+      String s = new String(bytebuf, "UTF-8");
+      return s.equals("UIMA") || s.equals("AMIU");
+    } catch (IOException e) {
+      return false;
+    } finally {
+      try {
+        dis.reset();
+      } catch (IOException e) {
+        throw new UIMARuntimeException(e);
+      }
+    }
+  }
   
-  static Header readHeader(DataInputStream dis) throws IOException {
+  public static Header readHeader(DataInputStream dis) throws IOException {
 
     Header h = new Header();
     // key
@@ -134,6 +193,7 @@ public class CommonSerDes {
     
     h.isDelta = (v & 2) != 0;
     h.isCompressed = (v & 4) != 0;
+    h.typeSystemIndexDefIncluded = (v & 8) != 0;
     h.seqVersionNbr = (byte) ((v & 0xFF00) >> 8);
    
     if (h.isCompressed) {
@@ -145,25 +205,25 @@ public class CommonSerDes {
     return h;
   }
 
-  static DataOutputStream maybeWrapToDataOutputStream(OutputStream os) {
+  public static DataOutputStream maybeWrapToDataOutputStream(OutputStream os) {
     if (os instanceof DataOutputStream) {
       return (DataOutputStream) os;
     }
     return new DataOutputStream(os);
   }
   
-  static DataInputStream maybeWrapToDataInputStream(InputStream is) {
-    if (is instanceof DataInputStream) {
-      return (DataInputStream) is;
+  public static DataInputStream maybeWrapToDataInputStream(InputStream os) {
+    if (os instanceof DataInputStream) {
+      return (DataInputStream) os;
     }
-    return new DataInputStream(is);
+    return new DataInputStream(os);
   }
 
   /** 
    * byte swapping reads of integer forms
    */
  
-  static class Reading {
+  public static class Reading {
     final DataInputStream dis;
     final boolean swap;
     

Modified: 
uima/uimaj/branches/experiment-v3-jcas/uimaj-core/src/main/java/org/apache/uima/cas/impl/Serialization.java
URL: 
http://svn.apache.org/viewvc/uima/uimaj/branches/experiment-v3-jcas/uimaj-core/src/main/java/org/apache/uima/cas/impl/Serialization.java?rev=1758045&r1=1758044&r2=1758045&view=diff
==============================================================================
--- 
uima/uimaj/branches/experiment-v3-jcas/uimaj-core/src/main/java/org/apache/uima/cas/impl/Serialization.java
 (original)
+++ 
uima/uimaj/branches/experiment-v3-jcas/uimaj-core/src/main/java/org/apache/uima/cas/impl/Serialization.java
 Sat Aug 27 14:29:59 2016
@@ -42,7 +42,10 @@ import org.apache.uima.resource.Resource
  *   - one which makes use of various custom binary serialization methods, and
  *   - one which just converts CAS and related objects into other objects which
  *     in turn are serializable by normal Java Object serialization.
- *      
+ * 
+ * See also CasIOUtils, which has static methods for serialization and 
deserialization, including 
+ * support for XMI and XCAS.
+ *    
  */
 public class Serialization {
 
@@ -92,6 +95,19 @@ public class Serialization {
             .getBaseIndexRepository());
     return ser;
   }
+  
+  /**
+   * Convert a Type System into a 
+   * CASMgrSerializer object which can be serialized
+   * 
+   * @param casMgr the type system and index repo definitions
+   * @return a serializable object version of these
+   */
+  public static CASMgrSerializer serializeCASMgrTypeSystemOnly(CASMgr casMgr) {
+    CASMgrSerializer ser = new CASMgrSerializer();
+    ser.addTypeSystem((TypeSystemImpl) casMgr.getCAS().getTypeSystem());
+    return ser;
+  }
 
   /**
    * Convert a CAS + the type system and index definitions into a
@@ -213,6 +229,26 @@ public class Serialization {
   }
   
   /**
+   * Serialize in compressed binary with type filtering
+   * This method can use type filtering to omit sending those types and/or 
features not present in the target type system.
+   *   - To omit type filtering, use null for the target type system
+   * It also only sends those feature structures which are reachable either 
from an index or references from other reachable feature structures.
+   * 
+   * @param cas the CAS to serialize
+   * @param out an OutputStream, a DataOutputStream, or a File
+   * @param includeTS true to serialize the type system
+   * @param includeTSI true to serialize the type system and the indexes 
definition
+   * @return information to be used on subsequent serializations (to save 
time) or deserializations (for receiving delta CASs), or reserializations (if 
sending delta CASs)
+   * @throws IOException if IO exception
+   * @throws ResourceInitializationException if target type system is 
incompatible with this CAS's type system
+   */  
+  public static ReuseInfo serializeWithCompression(CAS cas, Object out, 
boolean includeTS, boolean includeTSI) throws IOException, 
ResourceInitializationException {
+    BinaryCasSerDes6 bcs = new BinaryCasSerDes6(cas, null, includeTS, 
includeTSI);
+    bcs.serialize(out);
+    return bcs.getReuseInfo();
+  }
+  
+  /**
    * Serialize in compressed binary with type filtering
    * This method can use type filtering to omit sending those types and/or 
features not present in the target type system.
    *   - To omit type filtering, use null for the target type system

Propchange: 
uima/uimaj/branches/experiment-v3-jcas/uimaj-core/src/main/java/org/apache/uima/cas/impl/Serialization.java
------------------------------------------------------------------------------
--- svn:mergeinfo (added)
+++ svn:mergeinfo Sat Aug 27 14:29:59 2016
@@ -0,0 +1,8 @@
+/incubator/uima/uimaj/branches/mavenAlign/uimaj-core/src/main/java/org/apache/uima/cas/impl/Serialization.java:932400-933272
+/uima/uimaj/branches/2.6.0-json/uimaj-core/src/main/java/org/apache/uima/cas/impl/Serialization.java:1616936-1617592
+/uima/uimaj/branches/depend-on-july-9-build-tools/uimaj-core/src/main/java/org/apache/uima/cas/impl/Serialization.java:963167-964468
+/uima/uimaj/branches/depend-on-parent-pom-4/uimaj-core/src/main/java/org/apache/uima/cas/impl/Serialization.java:961329-961745
+/uima/uimaj/branches/filteredCompress-uima-2498/uimaj-core/src/main/java/org/apache/uima/cas/impl/Serialization.java:1436573-1462257
+/uima/uimaj/branches/mavenAlign/uimaj-core/src/main/java/org/apache/uima/cas/impl/Serialization.java:933273-944396
+/uima/uimaj/branches/test-parent-pom-6/uimaj-core/src/main/java/org/apache/uima/cas/impl/Serialization.java:1024030
+/uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/cas/impl/Serialization.java:1690273-1757906

Modified: 
uima/uimaj/branches/experiment-v3-jcas/uimaj-core/src/main/java/org/apache/uima/cas/impl/XmiCasDeserializer.java
URL: 
http://svn.apache.org/viewvc/uima/uimaj/branches/experiment-v3-jcas/uimaj-core/src/main/java/org/apache/uima/cas/impl/XmiCasDeserializer.java?rev=1758045&r1=1758044&r2=1758045&view=diff
==============================================================================
--- 
uima/uimaj/branches/experiment-v3-jcas/uimaj-core/src/main/java/org/apache/uima/cas/impl/XmiCasDeserializer.java
 (original)
+++ 
uima/uimaj/branches/experiment-v3-jcas/uimaj-core/src/main/java/org/apache/uima/cas/impl/XmiCasDeserializer.java
 Sat Aug 27 14:29:59 2016
@@ -91,7 +91,7 @@ public class XmiCasDeserializer {
 
   private static final String ID_ATTR_NAME = "xmi:id";
 
-  private class XmiCasDeserializerHandler extends DefaultHandler {
+  public class XmiCasDeserializerHandler extends DefaultHandler {
     // ///////////////////////////////////////////////////////////////////////
     // Internal states for the parser.
 

Added: 
uima/uimaj/branches/experiment-v3-jcas/uimaj-core/src/main/java/org/apache/uima/util/CasIOUtils.java
URL: 
http://svn.apache.org/viewvc/uima/uimaj/branches/experiment-v3-jcas/uimaj-core/src/main/java/org/apache/uima/util/CasIOUtils.java?rev=1758045&view=auto
==============================================================================
--- 
uima/uimaj/branches/experiment-v3-jcas/uimaj-core/src/main/java/org/apache/uima/util/CasIOUtils.java
 (added)
+++ 
uima/uimaj/branches/experiment-v3-jcas/uimaj-core/src/main/java/org/apache/uima/util/CasIOUtils.java
 Sat Aug 27 14:29:59 2016
@@ -0,0 +1,542 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.uima.util;
+
+import static org.apache.uima.cas.impl.Serialization.serializeCAS;
+import static org.apache.uima.cas.impl.Serialization.serializeWithCompression;
+
+import java.io.BufferedInputStream;
+import java.io.Closeable;
+import java.io.DataInputStream;
+import java.io.IOException;
+import java.io.InputStream;
+import java.io.ObjectInputStream;
+import java.io.ObjectOutputStream;
+import java.io.OutputStream;
+import java.net.URL;
+import java.util.Arrays;
+
+import org.apache.uima.UIMARuntimeException;
+import org.apache.uima.cas.CAS;
+import org.apache.uima.cas.CASRuntimeException;
+import org.apache.uima.cas.SerialFormat;
+import org.apache.uima.cas.TypeSystem;
+import org.apache.uima.cas.admin.CASMgr;
+import org.apache.uima.cas.impl.AllowPreexistingFS;
+import org.apache.uima.cas.impl.BinaryCasSerDes;
+import org.apache.uima.cas.impl.BinaryCasSerDes4;
+import org.apache.uima.cas.impl.CASCompleteSerializer;
+import org.apache.uima.cas.impl.CASImpl;
+import org.apache.uima.cas.impl.CASMgrSerializer;
+import org.apache.uima.cas.impl.CASSerializer;
+import org.apache.uima.cas.impl.CommonSerDes;
+import org.apache.uima.cas.impl.CommonSerDes.Header;
+import org.apache.uima.cas.impl.Serialization;
+import org.apache.uima.cas.impl.TypeSystemImpl;
+import org.apache.uima.cas.impl.XCASSerializer;
+import org.apache.uima.cas.impl.XmiCasSerializer;
+import org.xml.sax.SAXException;
+
+/**
+ * <p>A collection of static methods aimed at making it easy to</p>
+ * <ul>
+ *   <li>save and load CASes, and to</li>
+ *   <li>optionally include the CAS's Type System (abbreviated TS (only 
available for Compressed Form 6)) and optionally also include the CAS's indexes 
definition.</li>
+ *   <li>The combinaton of Type System and Indexes definition is called TSI.
+ *     <ul>
+ *       <li>The TSI's purpose: to replace the CAS's existing type system and 
index definition.</li>
+ *       <li>The TS's purpose: to specify the type system used in the 
serialized data for format Compressed Form 6, in order to allow deserializing 
into some other type system in the CAS, leniently.</li>
+ *     </ul>
+ *   </li>
+ * </ul>
+ *
+ * <p>TSI information can be</p>
+ * <ul>
+ *   <li>embedded</li>
+ *   <li>externally supplied (via another input source to the load)</li>
+ *   <li>both embedded and externally supplied.&nbsp; In this case the 
embedded takes precedence.</li>
+ * </ul>
+ *
+ * <p>TS information is available embedded, for COMPRESSED_FILTERED_TS format,
+ *    and also from embedded or external TSI information (since it also 
contains the type system information).</p>
+ *
+ * <p>When an external TSI is supplied while loading Compressed Form 6,</p>
+ * <ul>
+ *   <li>for COMPRESSED_FILTERED_TS
+ *     <ul>
+ *       <li>it uses the embedded TS for decoding</li>
+ *       <li>it uses the external TSI to replace the CAS's existing type 
system and index definition if CasLoadMode == REINIT.</li>
+ *     </ul>
+ *   </li>
+ *   <li>for COMPRESSED_FILTERED_TSI
+ *     <ul>
+ *       <li>the external TSI is ignored, the embedded one overrides, but 
otherwise operates as above.</li> 
+ *     </ul>
+ *   </li>
+ *   <li>for COMPRESSED_FILTERED
+ *     <ul>
+ *       <li>the external TSI's type system part is used for decoding.</li>
+ *       <li>if CasLoadMode == REINIT, the external TSI is also used to 
replace the CAS's existing type system and index definition.</li>
+ *     </ul>
+ *   </li>
+ * </ul>
+ *
+ * <p>Compressed Form 6 loading decoding type system is picked from these 
sources, in this order:</p>
+ * <ul>
+ *   <li>a passed in type system</li>
+ *   <li>an embedded TS or TSI</li>
+ *   <li>an external TSI</li>
+ *   <li>the CAS's type system</li>
+ * </ul>
+ *
+ * <p>The serialization formats supported here are specified in the 
SerialFormat enum.</p>
+ *
+ * <p>The <code>load </code>api's automatically use the appropriate 
deserializers, based on the input data format.</p>
+ *
+ * <p>Loading inputs may be supplied as URLs or as an appropriately buffered 
InputStream.</p>
+ *
+ * <p>Note: you can use Files or Paths by converting these to URLs:</p>
+ * <ul>
+ *   <li><code>URL url = a_path.toUri().toURL();</code></li>
+ *   <li><code>URL url = a_file.toUri().toURL();</code></li>
+ * </ul>
+ *
+ * <p>When loading, an optional CasLoadMode enum value maybe specified to 
indicate</p>
+ * <ul>
+ *   <li>LENIENT loading - used with XCas and XMI data data sources to 
silently ignore types and features present in the serialized form, but not in 
the receiving type system.</li>
+ *   <li>REINIT - used with Compressed Form 6 loading to indicate that&nbsp; 
if no embedded TSI information is available, the external TSI is to be used to 
replace the CAS's existing type system and index definition.</li>
+ * </ul>
+ *
+ * <p style="padding-left: 30px;">For more details, see the Javadocs for 
CasLoadMode.</p>
+ *
+ * <p>When TS or TSI information is saved, it is either saved in the same 
destination (e.g. file or stream), or in a separate one.</p>
+ * <ul>
+ *   <li>The serialization formats ending in _TSI and _TS support saving the 
TSI (or TS) in the same destination.</li>
+ *   <li>The save APIs for other formats can optionally also save the TSI into 
a separate (second) OutputStream.</li>
+ * </ul>
+ *
+ * <p>Summary of APIs for saving:</p>
+ * <pre style="padding-left: 30px;">
+ *   <code>save(aCAS, outputStream, aSerialFormat)</code>
+ *   <code>save(aCAS, outputStream, tsiOutputStream, 
aSerialFormat)</code></pre>
+ *
+ * <p>Summary of APIs for loading:</p>
+ * <pre style="padding-left: 30px;">
+ *   <code>load(aURL&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; , aCas)</code>
+ *   <code>load(inputStream, aCas)</code>
+ *   <code>load(inputStream, aCas, typeSystem)</code> // typeSystem used for 
decoding Compressed Form 6
+ *   <code>load(inputStream, tsiInputStream, aCas)</code></pre>
+ * <pre style="padding-left: 30px;">
+ *   <code>load(aURL&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; , 
tsiURL&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; , aCAS, 
casLoadMode)&nbsp;&nbsp; - the second URL is for loading a separately-stored 
TSI</code>
+ *   <code>load(inputStream, tsiInputStream, aCAS, aCasLoadMode)</code>
+ *   <code>load(aURL&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; , 
tsiURL&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; , aCAS, 
lenient)&nbsp;&nbsp;&nbsp;&nbsp;&nbsp;&nbsp; - lenient is used to set the 
CasLoadMode to LENIENT or DEFAULT</code>
+ *   <code>load(inputStream, tsiInputStream, aCAS, lenient)</code></pre>
+ */
+
+public class CasIOUtils {
+
+  /**
+   * Loads a Cas from a URL source. 
+   * For SerialFormats ending with _TSI except for COMPRESSED_FILTERED_TSI, 
+   * the CAS's type system and indexes definition are replaced.
+   * CasLoadMode is DEFAULT.
+   * 
+   * @param casUrl
+   *          The url containing the CAS
+   * @param aCAS
+   *          The CAS that should be filled
+   * @return the SerialFormat of the loaded CAS
+   * @throws IOException
+   *           - Problem loading from given URL
+   */
+  public static SerialFormat load(URL casUrl, CAS aCAS) throws IOException {
+
+    return load(casUrl, null, aCAS, CasLoadMode.DEFAULT);
+  }
+
+  /**
+   * Loads a CAS from a URL source. The format is determined from the content.
+   * 
+   * If the value of tsiUrl is null it is ignored.
+   * 
+   * @param casUrl
+   *          The url to deserialize the CAS from
+   * @param tsiUrl
+   *          null or an optional url to deserialize the type system and index 
definitions from
+   * @param aCAS
+   *          The CAS that should be filled
+   * @param casLoadMode specifies how to handle reinitialization and lenient 
loading
+   *          see the Javadocs for CasLoadMode
+   * @return the SerialFormat of the loaded CAS
+   * @throws IOException Problem loading
+   */
+  public static SerialFormat load(URL casUrl, URL tsiUrl, CAS aCAS, 
CasLoadMode casLoadMode)
+          throws IOException {
+    InputStream casIS = new BufferedInputStream(casUrl.openStream());
+    InputStream tsIS = (tsiUrl == null) ? null : new 
BufferedInputStream(tsiUrl.openStream());
+    try {
+      return load(casIS, tsIS, aCAS, casLoadMode);
+    } finally {
+      closeQuitely(casIS);
+      closeQuitely(tsIS);
+    }  
+  }
+  
+  /**
+   * Loads a CAS from a URL source. The format is determined from the content.
+   * For SerialFormats ending with _TSI except for COMPRESSED_FILTERED_TSI, 
+   * the CAS's type system and indexes definition are replaced.
+   * CasLoadMode is set according to the leniently flag.
+   * 
+   * @param casUrl
+   *          The url to deserialize the CAS from
+   * @param tsiUrl
+   *          The optional url to deserialize the type system and index 
definitions from
+   * @param aCAS
+   *          The CAS that should be filled
+   * @param leniently true means do lenient loading
+   * @return the SerialFormat of the loaded CAS
+   * @throws IOException Problem loading
+   */
+  public static SerialFormat load(URL casUrl, URL tsiUrl, CAS aCAS, boolean 
leniently)
+      throws IOException {
+    return load(casUrl, tsiUrl, aCAS, leniently ? CasLoadMode.LENIENT : 
CasLoadMode.DEFAULT);
+  }
+  
+  /**
+   * Loads a Cas from an Input Stream. The format is determined from the 
content.
+   * For SerialFormats ending with _TSI except for COMPRESSED_FILTERED_TSI, 
+   * the CAS's type system and indexes definition are replaced.
+   * CasLoadMode is DEFAULT.
+   * 
+   * @param casInputStream
+   *          The input stream containing the CAS.  Caller should buffer this 
appropriately.
+   * @param aCAS
+   *          The CAS that should be filled
+   * @return the SerialFormat of the loaded CAS
+   * @throws IOException
+   *           - Problem loading from given InputStream
+   */
+  public static SerialFormat load(InputStream casInputStream, CAS aCAS) throws 
IOException {
+    return load(casInputStream, null, aCAS, CasLoadMode.DEFAULT);
+  }
+
+  /**
+   * Loads a CAS from an Input Stream. The format is determined from the 
content.
+   * 
+   * For SerialFormats ending with _TSI the embedded value is used instead of 
any supplied external TSI information.
+   * TSI information is available either via embedded value, or if a non-null 
input is passed for tsiInputStream.
+   * 
+   * If TSI information is available, the CAS's type system and indexes 
definition are replaced,
+   * except for SerialFormats COMPRESSED_FILTERED, COMPRESSED_FILTERED_TS, and 
COMPRESSED_FILTERED_TSI.
+   *
+   * The CasLoadMode is DEFAULT.
+   * 
+   * @param casInputStream -
+   * @param tsiInputStream -
+   * @param aCAS -
+   * @return -
+   * @throws IOException -
+   */
+  public static SerialFormat load(InputStream casInputStream, InputStream 
tsiInputStream, CAS aCAS) throws IOException {
+    return load(casInputStream, tsiInputStream, aCAS, CasLoadMode.DEFAULT);
+  }
+
+  /**
+   * Loads a CAS from an Input Stream. The format is determined from the 
content.
+   * 
+   * For SerialFormats ending with _TSI the embedded value is used instead of 
any supplied external TSI information.
+   * TSI information is available either via embedded value, or if a non-null 
input is passed for tsiInputStream.
+   * 
+   * If TSI information is available, the CAS's type system and indexes 
definition are replaced,
+   * except for SerialFormats COMPRESSED_FILTERED, COMPRESSED_FILTERED_TS, and 
COMPRESSED_FILTERED_TSI.
+   *
+   * The CasLoadMode is set to LENIENT if the leniently flag is true; 
otherwise it is set to DEFAULT.
+   * 
+   * @param casInputStream -
+   * @param tsiInputStream -
+   * @param aCAS -
+   * @param leniently - 
+   * @return -
+   * @throws IOException -
+   */
+  public static SerialFormat load(InputStream casInputStream, InputStream 
tsiInputStream, CAS aCAS, boolean leniently) throws IOException {
+    return load(casInputStream, tsiInputStream, aCAS, leniently ? 
CasLoadMode.LENIENT : CasLoadMode.DEFAULT);
+  }
+
+  /**
+   * Loads a CAS from an Input Stream. The format is determined from the 
content.
+   * For formats of ending in _TSI SERIALIZED_TSI or COMPRESSED_FILTERED_TSI, 
+   * the type system and index definitions are read from the cas input source;
+   * the value of tsiInputStream is ignored.
+   * 
+   * For other formats, if the tsiInputStream is not null, 
+   * type system and index definitions are read from that source.
+   * 
+   * If TSI information is available, the CAS's type system and indexes 
definition are replaced,
+   * except for SerialFormats COMPRESSED_FILTERED, COMPRESSED_FILTERED_TS, and 
COMPRESSED_FILTERED_TSI.
+   * 
+   *   If the CasLoadMode == REINIT, then the TSI information is also used for 
these 3 formats to replace the CAS's definitions.
+   *   
+   * @param casInputStream
+   *          The input stream containing the CAS, appropriately buffered.
+   * @param tsiInputStream
+   *          The optional input stream containing the type system, 
appropriately buffered. 
+   *          This is only used if it is non null and 
+   *            -  the casInputStream does not already come with an embedded 
CAS Type System and Index Definition, or 
+   *            -  the serial format is COMPRESSED_FILTERED_TSI
+   * @param aCAS
+   *          The CAS that should be filled
+   * @param casLoadMode specifies loading alternative like lenient and reinit, 
see CasLoadMode.
+   * @return the SerialFormat of the loaded CAS
+   * @throws IOException
+   *           - Problem loading from given InputStream
+   */
+  public static SerialFormat load(InputStream casInputStream, InputStream 
tsiInputStream, CAS aCAS,
+          CasLoadMode casLoadMode) throws IOException {
+    return load(casInputStream, tsiInputStream, aCAS, casLoadMode, null);
+  }
+
+  /**
+   * This load variant can be used for loading Form 6 compressed CASes where 
the 
+   * type system to use to deserialize is provided as an argument.  It can 
also load other formats,
+   * where its behavior is identical to load(casInputStream, aCas).
+   *
+   * Loads a CAS from an Input Stream. The format is determined from the 
content.
+   * For SerialFormats of ending in _TSI SERIALIZED_TSI or 
COMPRESSED_FILTERED_TSI, 
+   * the type system and index definitions are read from the cas input source;
+   * the value of typeSystem is ignored.
+   * 
+   * For COMPRESSED_FILTERED_xxx formats, if the typeSystem is not null, 
+   * the typeSystem is used for decoding.
+   * 
+   * If embedded TSI information is available, the CAS's type system and 
indexes definition are replaced,
+   * except for SerialFormats COMPRESSED_FILTERED, COMPRESSED_FILTERED_TS, and 
COMPRESSED_FILTERED_TSI.
+   * 
+   *   To replace the CAS's type system and indexes definition for these, use 
a load form which 
+   *   has the CasLoadMode argument, and set this to REINIT.
+   *     
+   * @param casInputStream
+   *          The input stream containing the CAS, appropriately buffered.
+   * @param aCAS
+   *          The CAS that should be filled
+   * @param typeSystem the type system to use for decoding the serialized 
form, must be non-null         
+   * @return the SerialFormat of the loaded CAS
+   * @throws IOException Problem loading from given InputStream   
+   */
+  public static SerialFormat load(InputStream casInputStream, CAS aCAS, 
TypeSystem typeSystem) throws IOException {
+    return load(casInputStream, null, aCAS, CasLoadMode.DEFAULT, 
(TypeSystemImpl) typeSystem);
+  }
+  
+  private static SerialFormat load(InputStream casInputStream, InputStream 
tsiInputStream, CAS aCAS,
+      CasLoadMode casLoadMode, TypeSystemImpl typeSystem) throws IOException {
+
+    if (!casInputStream.markSupported()) {
+      casInputStream = new BufferedInputStream(casInputStream);
+    }
+    
+    CASImpl casImpl = (CASImpl) aCAS;
+    BinaryCasSerDes bcsd = casImpl.getBinaryCasSerDes();
+
+    // scan the first part of the file for known formats
+    casInputStream.mark(6);
+    byte[] firstPartOfFile = new byte[6];
+    int bytesReadCount = casInputStream.read(firstPartOfFile);
+    casInputStream.reset();
+    String start = new String(firstPartOfFile, 0, bytesReadCount, 
"UTF-8").toLowerCase();
+
+    if (start.startsWith("<?xml ")) {  // could be XCAS or XMI
+      try {
+        bcsd.setupCasFromCasMgrSerializer(readCasManager(tsiInputStream));
+        // next call decides on XMI or XCAS via content
+        return XmlCasDeserializer.deserializeR(casInputStream, aCAS, 
casLoadMode == CasLoadMode.LENIENT);
+      } catch (SAXException e) {
+        throw new UIMARuntimeException(e);
+      }
+    }
+    
+    //  Not an XML file, decode as binary file
+    DataInputStream deserIn = 
CommonSerDes.maybeWrapToDataInputStream(casInputStream);
+    if (CommonSerDes.isBinaryHeader(deserIn)) {
+      
+      /*******************************************
+       * Binary, Compressed Binary (form 4 or 6)
+       ******************************************/
+      Header h = CommonSerDes.readHeader(deserIn);
+      return bcsd.reinit(h, casInputStream, readCasManager(tsiInputStream), 
casLoadMode, null, AllowPreexistingFS.allow, null);
+    
+    } else {
+      
+      /******************************
+       * Java Object loading
+       ******************************/
+      ObjectInputStream ois = new ObjectInputStream(casInputStream);
+      try {
+        Object o = ois.readObject();
+        if (o instanceof CASSerializer) {
+          bcsd.setupCasFromCasMgrSerializer(readCasManager(tsiInputStream));
+          bcsd.reinit((CASSerializer) o); // deserialize from object
+          return SerialFormat.SERIALIZED;
+        } else if (o instanceof CASCompleteSerializer) {
+          // with a type system use that, ignore any supplied via 
tsiInputStream
+          bcsd.reinit((CASCompleteSerializer) o);
+          return SerialFormat.SERIALIZED_TSI;
+        } else {
+          /**Unrecognized serialized CAS format*/
+          throw new 
CASRuntimeException(CASRuntimeException.UNRECOGNIZED_SERIALIZED_CAS_FORMAT);  
+        }
+      } catch (ClassNotFoundException e) {
+        /**Unrecognized serialized CAS format*/
+        throw new 
CASRuntimeException(CASRuntimeException.UNRECOGNIZED_SERIALIZED_CAS_FORMAT);
+      }       
+    }
+  }
+  
+  /**
+   * Write the CAS in the specified format.
+   * 
+   * @param aCas
+   *          The CAS that should be serialized and stored
+   * @param docOS
+   *          The output stream for the CAS
+   * @param format
+   *          The SerialFormat in which the CAS should be stored.
+   * @throws IOException
+   *           - Problem saving to the given InputStream
+   */
+  public static void save(CAS aCas, OutputStream docOS, SerialFormat format) 
throws IOException {
+    save(aCas, docOS, null, format);
+  }
+
+  /**
+   * Write the CAS in the specified format. If the format does not include 
typesystem information
+   * and the optional output stream of the typesystem is specified, then the 
typesystem information
+   * is written there.
+   * 
+   * @param aCas
+   *          The CAS that should be serialized and stored
+   * @param docOS
+   *          The output stream for the CAS, with appropriate buffering
+   * @param tsiOS
+   *          Optional output stream for type system information. Only used if 
the format does not
+   *          support storing typesystem information directly in the main 
output file.
+   * @param format
+   *          The SerialFormat in which the CAS should be stored.
+   * @throws IOException
+   *           - Problem saving to the given InputStream
+   */
+  public static void save(CAS aCas, OutputStream docOS, OutputStream tsiOS, 
SerialFormat format)
+          throws IOException {
+    boolean typeSystemWritten = false;
+    try {
+      switch (format) {
+        case XMI:
+          XmiCasSerializer.serialize(aCas, docOS);
+          break;
+        case XCAS:
+          XCASSerializer.serialize(aCas, docOS, true); // true = formatted 
output
+          break;
+        case SERIALIZED:
+          writeJavaObject(Serialization.serializeCAS(aCas), docOS);
+          break;
+        case SERIALIZED_TSI:
+          writeJavaObject(Serialization.serializeCASComplete((CASMgr) aCas), 
docOS);
+          typeSystemWritten = true; // Embedded type system
+          break;
+        case BINARY:              // Java-serialized CAS without type system
+          serializeCAS(aCas, docOS);
+          break;
+        case BINARY_TSI:              // Java-serialized CAS without type 
system
+          CASSerializer ser = new CASSerializer();
+          ser.addCAS((CASImpl) aCas, docOS, true);
+          break;
+        case COMPRESSED:          // Binary compressed CAS without type system 
(form 4)
+          serializeWithCompression(aCas, docOS);
+          break;
+        case COMPRESSED_TSI:          // Binary compressed CAS without type 
system (form 4)
+          new BinaryCasSerDes4((TypeSystemImpl)aCas.getTypeSystem(), 
false).serializeWithTsi((CASImpl) aCas, docOS);
+          break;
+        case COMPRESSED_FILTERED: // Binary compressed CAS (form 6)
+          serializeWithCompression(aCas, docOS, false, false);
+          break;
+        case COMPRESSED_FILTERED_TS:
+          serializeWithCompression(aCas, docOS, true, false);
+          typeSystemWritten = true; // Embedded type system
+          break;
+        case COMPRESSED_FILTERED_TSI:
+          serializeWithCompression(aCas, docOS, false, true);
+          typeSystemWritten = true; // Embedded type system
+          break;
+        default:
+          StringBuilder sb = new StringBuilder();
+          for (SerialFormat sf : SerialFormat.values()) {
+            sb = sb.append(sf.toString()).append(", ");
+          }
+          throw new IllegalArgumentException("Unknown format [" + format.name()
+                  + "]. Must be one of: " + sb.toString());
+      }
+    } catch (IOException e) {
+      throw e;
+    } catch (Exception e) {
+      throw new IOException(e);
+    }
+
+    // Write type system to the separate stream only if it has not already 
been embedded into the
+    // main stream
+    if (tsiOS != null && !typeSystemWritten) {
+      writeTypeSystem(aCas, tsiOS, true);
+    }
+  }
+
+  private static CASMgrSerializer readCasManager(InputStream tsiInputStream) 
throws IOException {
+    try {
+      if (null == tsiInputStream) {
+        return null;
+      }
+      ObjectInputStream is = new ObjectInputStream(tsiInputStream);
+      return (CASMgrSerializer) is.readObject();
+    } catch (ClassNotFoundException e) {
+      throw new IOException(e);
+    }    
+  }
+    
+  private static void writeJavaObject(Object o, OutputStream aOS) throws 
IOException {
+    ObjectOutputStream tsiOS = new ObjectOutputStream(aOS);
+    tsiOS.writeObject(o);
+    tsiOS.flush();
+  }
+  
+  public static void writeTypeSystem(CAS aCas, OutputStream aOS, boolean 
includeIndexDefs) throws IOException {
+    writeJavaObject(includeIndexDefs 
+                        ? Serialization.serializeCASMgr((CASImpl) aCas)
+                        : 
Serialization.serializeCASMgrTypeSystemOnly((CASImpl) aCas)
+                      , aOS);
+  }
+  
+  private static void closeQuitely(Closeable closeable) {
+    if (closeable != null) {
+      try {
+        closeable.close();
+      } catch (IOException e) {
+        // do nothing
+      }
+    }
+  }
+  
+}

Added: 
uima/uimaj/branches/experiment-v3-jcas/uimaj-core/src/main/java/org/apache/uima/util/CasLoadMode.java
URL: 
http://svn.apache.org/viewvc/uima/uimaj/branches/experiment-v3-jcas/uimaj-core/src/main/java/org/apache/uima/util/CasLoadMode.java?rev=1758045&view=auto
==============================================================================
--- 
uima/uimaj/branches/experiment-v3-jcas/uimaj-core/src/main/java/org/apache/uima/util/CasLoadMode.java
 (added)
+++ 
uima/uimaj/branches/experiment-v3-jcas/uimaj-core/src/main/java/org/apache/uima/util/CasLoadMode.java
 Sat Aug 27 14:29:59 2016
@@ -0,0 +1,98 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ * 
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ * 
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.uima.util;
+
+/**
+ * Used with CasIOUtils, maybe elsewhere, to indicate how CASes are to be 
loaded or saved.
+ * 
+ * TSI = serialized type system and index definitions
+ * TS = serialized type system
+ *
+ * TSI can be used to reinitialize the CAS's type system and its index 
definitions.
+ * TS (which can be obtained from TSI) is used only with Compressed form 6 
+ *   to specify the type system used to decode the serialized data.
+ *   
+ * The TS/TSI artifact is self-identifying as to which kind it is, when 
deserializing.
+ * 
+ * TSI and TS can be provided via two sources:
+ *   - embedded in some serialized forms
+ *   - via a separate artifact 
+ * 
+ * If both embedded and separate values are available for TS or TSI, then 
embedded takes precedence, external is ignored,
+ *   except for compressed form 6; in that case, both are used: 
+ *     - external used to reinitialize the CAS's type system and indexes 
definition, and
+ *     - embedded used to decode the serialized data, leniently.
+ *
+ * Compressed form 6 type system for decoding comes from the first one 
available of: 
+ *   - embedded TS or TSI
+ *   - external TS or TSI
+ *   - the receiving CAS's type system 
+ */
+public enum CasLoadMode {
+
+  /**
+   * Default operation:
+   *
+   * If TSI is available, 
+   *   reinitialize the CAS's type system and its indexes definition, except 
for Compressed Form 6, 
+   *     using the first TSI in this list:
+   *       - embedded
+   *       - external
+   *     (to do this for Compressed Form 6, specify REINIT)
+   *     Logic for doing embedded before external:
+   *       Examining each serialized form:
+   *         Java Object:  if embedded is available, it's the right one, a 
different one causes exceptions
+   *         XCas, XMI:  doesn't apply - no way to have embedded
+   *         Form 6 - excluded, anyway, see below
+   *         Form 4 and Binary: these require the serialized type system match 
the CASs, so the embedded one is always right.
+   *     
+   * Compressed Form 6:
+   *   - decoding: use the first type system in this list:
+   *     - embedded TS/TSI
+   *     - external TS/TSI
+   *     - the receiving CAS's type system 
+   *     
+   * For all SerialFormats except Compressed type 6, default is to require 
strict matching (not lenient).
+   */
+  DEFAULT,
+  
+  /**
+   * Same as DEFAULT, except for XMI and XCAS formats:
+   *   Specifies lenient loading for those formats, which means that the 
+   *   load will not indicate an error if the incoming data has types and/or 
features not in the receiving CAS,
+   *   but will instead silently ignore these.
+   */
+  LENIENT,  
+  
+  /**
+   * Used for Compressed Form 6 and to .
+   * 
+   * Same as default, except that the internal and / or external TSI is used 
to 
+   *   reinitialize the CAS's type system and its indexes definition, 
+   *   using the first TSI in this list:
+   *     - external (to allow the embedded to specify the decoding type system)
+   *     - embedded (if it is a TSI)
+   *
+   * Decode (same as DEFAULT) 
+   * 
+   * Error if no TSI information available
+   */
+  REINIT,  
+  ;
+}

Modified: 
uima/uimaj/branches/experiment-v3-jcas/uimaj-core/src/main/java/org/apache/uima/util/XmlCasDeserializer.java
URL: 
http://svn.apache.org/viewvc/uima/uimaj/branches/experiment-v3-jcas/uimaj-core/src/main/java/org/apache/uima/util/XmlCasDeserializer.java?rev=1758045&r1=1758044&r2=1758045&view=diff
==============================================================================
--- 
uima/uimaj/branches/experiment-v3-jcas/uimaj-core/src/main/java/org/apache/uima/util/XmlCasDeserializer.java
 (original)
+++ 
uima/uimaj/branches/experiment-v3-jcas/uimaj-core/src/main/java/org/apache/uima/util/XmlCasDeserializer.java
 Sat Aug 27 14:29:59 2016
@@ -23,6 +23,7 @@ import java.io.IOException;
 import java.io.InputStream;
 
 import org.apache.uima.cas.CAS;
+import org.apache.uima.cas.SerialFormat;
 import org.apache.uima.cas.impl.OutOfTypeSystemData;
 import org.apache.uima.cas.impl.XCASDeserializer;
 import org.apache.uima.cas.impl.XmiCasDeserializer;
@@ -59,7 +60,7 @@ public abstract class XmlCasDeserializer
   }
 
   /**
-   * Deserializes a CAS from XMI.
+   * Deserializes a CAS from XMI or XCAS.
    * 
    * @param aStream
    *          input stream from which to read the XML document
@@ -78,11 +79,40 @@ public abstract class XmlCasDeserializer
   public static void deserialize(InputStream aStream, CAS aCAS, boolean 
aLenient)
           throws SAXException, IOException {
     XMLReader xmlReader = XMLReaderFactory.createXMLReader();
-    ContentHandler handler = new XmlCasDeserializerHandler(aCAS, aLenient);
+    XmlCasDeserializerHandler handler = new XmlCasDeserializerHandler(aCAS, 
aLenient);
     xmlReader.setContentHandler(handler);
     xmlReader.parse(new InputSource(aStream));
   }
 
+  /**
+   * Deserializes a CAS from XMI or XCAS, version returning the SerialFormat
+   * 
+   * @param aStream
+   *          input stream from which to read the XML document
+   * @param aCAS
+   *          CAS into which to deserialize. This CAS must be set up with a 
type system that is
+   *          compatible with that in the XML
+   * @param aLenient
+   *          if true, unknown Types will be ignored. If false, unknown Types 
will cause an
+   *          exception. The default is false.
+   * @return the format of the data  
+   * 
+   * @throws SAXException
+   *           if an XML Parsing error occurs
+   * @throws IOException
+   *           if an I/O failure occurs
+   */
+  static SerialFormat deserializeR(InputStream aStream, CAS aCAS, boolean 
aLenient)
+      throws SAXException, IOException {
+    XMLReader xmlReader = XMLReaderFactory.createXMLReader();
+    XmlCasDeserializerHandler handler = new XmlCasDeserializerHandler(aCAS, 
aLenient);
+    xmlReader.setContentHandler(handler);
+    xmlReader.parse(new InputSource(aStream));
+    return (handler.mDelegateHandler instanceof 
XmiCasDeserializer.XmiCasDeserializerHandler)
+             ? SerialFormat.XMI
+             : SerialFormat.XCAS;
+  }
+
   static class XmlCasDeserializerHandler extends DefaultHandler {
     private CAS mCAS;
 

Propchange: 
uima/uimaj/branches/experiment-v3-jcas/uimaj-core/src/main/java/org/apache/uima/util/XmlCasDeserializer.java
------------------------------------------------------------------------------
--- svn:mergeinfo (added)
+++ svn:mergeinfo Sat Aug 27 14:29:59 2016
@@ -0,0 +1,8 @@
+/incubator/uima/uimaj/branches/mavenAlign/uimaj-core/src/main/java/org/apache/uima/util/XmlCasDeserializer.java:932400-933272
+/uima/uimaj/branches/2.6.0-json/uimaj-core/src/main/java/org/apache/uima/util/XmlCasDeserializer.java:1616936-1617592
+/uima/uimaj/branches/depend-on-july-9-build-tools/uimaj-core/src/main/java/org/apache/uima/util/XmlCasDeserializer.java:963167-964468
+/uima/uimaj/branches/depend-on-parent-pom-4/uimaj-core/src/main/java/org/apache/uima/util/XmlCasDeserializer.java:961329-961745
+/uima/uimaj/branches/filteredCompress-uima-2498/uimaj-core/src/main/java/org/apache/uima/util/XmlCasDeserializer.java:1436573-1462257
+/uima/uimaj/branches/mavenAlign/uimaj-core/src/main/java/org/apache/uima/util/XmlCasDeserializer.java:933273-944396
+/uima/uimaj/branches/test-parent-pom-6/uimaj-core/src/main/java/org/apache/uima/util/XmlCasDeserializer.java:1024030
+/uima/uimaj/trunk/uimaj-core/src/main/java/org/apache/uima/util/XmlCasDeserializer.java:1690273-1757900


Reply via email to