Author: kiwiwings
Date: Wed Jan  1 22:44:42 2020
New Revision: 1872223

URL: http://svn.apache.org/viewvc?rev=1872223&view=rev
Log:
Fix Visio compression

Modified:
    poi/trunk/src/java/org/apache/poi/util/LZWDecompresser.java
    poi/trunk/src/scratchpad/src/org/apache/poi/hdgf/HDGFLZW.java
    poi/trunk/src/scratchpad/src/org/apache/poi/hdgf/HDGFLZWCompressor.java
    poi/trunk/src/scratchpad/testcases/org/apache/poi/hdgf/TestHDGFLZW.java

Modified: poi/trunk/src/java/org/apache/poi/util/LZWDecompresser.java
URL: 
http://svn.apache.org/viewvc/poi/trunk/src/java/org/apache/poi/util/LZWDecompresser.java?rev=1872223&r1=1872222&r2=1872223&view=diff
==============================================================================
--- poi/trunk/src/java/org/apache/poi/util/LZWDecompresser.java (original)
+++ poi/trunk/src/java/org/apache/poi/util/LZWDecompresser.java Wed Jan  1 
22:44:42 2020
@@ -23,184 +23,167 @@ import java.io.OutputStream;
 
 /**
  * This class provides common functionality for the
- *  various LZW implementations in the different file
- *  formats.
+ * various LZW implementations in the different file
+ * formats.
  * It's currently used by HDGF and HMEF.
- *
+ * <p>
  * Two good resources on LZW are:
- *  http://en.wikipedia.org/wiki/LZW
- *  http://marknelson.us/1989/10/01/lzw-data-compression/
+ * http://en.wikipedia.org/wiki/LZW
+ * http://marknelson.us/1989/10/01/lzw-data-compression/
  */
 public abstract class LZWDecompresser {
 
-   //arbitrarily selected; may need to increase
-   private static final int MAX_RECORD_LENGTH = 1_000_000;
-
-   /**
-    * Does the mask bit mean it's compressed or uncompressed?
-    */
-   private final boolean maskMeansCompressed;
-   /**
-    * How much to append to the code length in the stream
-    *  to get the real code length? Normally 2 or 3
-    */
-   private final int codeLengthIncrease;
-   /**
-    * Does the 12 bits of the position get stored in
-    *  Little Endian or Big Endian form?
-    * This controls whether a pos+length of 0x12 0x34
-    *  becomes a position of 0x123 or 0x312
-    */
-   private final boolean positionIsBigEndian;
-   
-   protected LZWDecompresser(boolean maskMeansCompressed, 
-            int codeLengthIncrease, boolean positionIsBigEndian) {
-      this.maskMeansCompressed = maskMeansCompressed;
-      this.codeLengthIncrease = codeLengthIncrease;
-      this.positionIsBigEndian = positionIsBigEndian;
-   }
-   
-   /**
-    * Populates the dictionary, and returns where in it
-    *  to begin writing new codes.
-    * Generally, if the dictionary is pre-populated, then new
-    *  codes should be placed at the end of that block.
-    * Equally, if the dictionary is left with all zeros, then
-    *  usually the new codes can go in at the start.
-    */
-   protected abstract int populateDictionary(byte[] dict);
-   
-   /**
-    * Adjusts the position offset if needed when looking
-    *  something up in the dictionary.
-    */
-   protected abstract int adjustDictionaryOffset(int offset);
-   
-   /**
-    * Decompresses the given input stream, returning the array of bytes
-    *  of the decompressed input.
-    */
-   public byte[] decompress(InputStream src) throws IOException {
-      ByteArrayOutputStream res = new ByteArrayOutputStream();
-      decompress(src,res);
-      return res.toByteArray();
-   }
-   
-   /**
-    * Perform a streaming decompression of the input.
-    * Works by:
-    * 1) Reading a flag byte, the 8 bits of which tell you if the
-    *     following 8 codes are compressed our un-compressed
-    * 2) Consider the 8 bits in turn
-    * 3) If the bit is set, the next code is un-compressed, so
-    *     add it to the dictionary and output it
-    * 4) If the bit isn't set, then read in the length and start
-    *     position in the dictionary, and output the bytes there
-    * 5) Loop until we've done all 8 bits, then read in the next
-    *     flag byte
-    */
-   public void decompress(InputStream src, OutputStream res) throws 
IOException {
-      // How far through the output we've got
-      // (This is normally used &4095, so it nicely wraps)
-      // The initial value is set when populating the dictionary
-      int pos;
-      // The flag byte is treated as its 8 individual
-      //  bits, which tell us if the following 8 codes
-      //  are compressed or un-compressed
-      int flag;
-      // The mask, between 1 and 255, which is used when
-      //  processing each bit of the flag byte in turn
-      int mask;
-
-      // We use 12 bit codes:
-      // * 0-255 are real bytes
-      // * 256-4095 are the substring codes
-      // Java handily initialises our buffer / dictionary
-      //  to all zeros
-      byte[] buffer = new byte[4096];
-      pos = populateDictionary(buffer);
-
-      // These are bytes as looked up in the dictionary
-      // It needs to be signed, as it'll get passed on to
-      //  the output stream
-      byte[] dataB = IOUtils.safelyAllocate(16+codeLengthIncrease, 
MAX_RECORD_LENGTH);
-      // This is an unsigned byte read from the stream
-      // It needs to be unsigned, so that bit stuff works
-      int dataI;
-      // The compressed code sequence is held over 2 bytes
-      int dataIPt1, dataIPt2;
-      // How long a code sequence is, and where in the
-      //  dictionary to start at
-      int len, pntr;
-
-      while( (flag = src.read()) != -1 ) {
-         // Compare each bit in our flag byte in turn:
-         for(mask = 1; mask < 256 ; mask <<= 1) {
-            // Is this a new code (un-compressed), or
-            //  the use of existing codes (compressed)?
-            boolean isMaskSet = (flag & mask) > 0;
-            if( isMaskSet ^ maskMeansCompressed ) {
-               // Retrieve the un-compressed code
-               if( (dataI = src.read()) != -1) {
-                  // Save the byte into the dictionary
-                  buffer[(pos&4095)] = fromInt(dataI);
-                  pos++;
-                  // And output the byte
-                  res.write( new byte[] {fromInt(dataI)} );
-               }
-            } else {
-               // We have a compressed sequence
-               // Grab the next 16 bits of data
-               dataIPt1 = src.read();
-               dataIPt2 = src.read();
-               if(dataIPt1 == -1 || dataIPt2 == -1) break;
-
-               // Build up how long the code sequence is, and
-               //  what position of the code to start at
-               // (The position is the usually the first 12 bits, 
-               //  and the length is usually the last 4 bits)
-               len = (dataIPt2 & 15) + codeLengthIncrease;
-               if(positionIsBigEndian) {
-                  pntr = (dataIPt1<<4) + (dataIPt2>>4);
-               } else {
-                  pntr = dataIPt1 + ((dataIPt2&0xF0)<<4);
-               }
-               
-               // Adjust the pointer as needed
-               pntr = adjustDictionaryOffset(pntr);
-
-               // Loop over the codes, outputting what they correspond to
-               for(int i=0; i<len; i++) {
-                  dataB[i] = buffer[(pntr + i) & 4095];
-                  buffer[ (pos + i) & 4095 ] = dataB[i];
-               }
-               res.write(dataB, 0, len);
-
-               // Record how far along the stream we have moved
-               pos = pos + len;
+    /** the size of our dictionary */
+    public static final int DICT_SIZE = 0x1000;
+    /** the mask for calculating / wrapping dictionary offsets */
+    public static final int DICT_MASK = 0xFFF;
+
+    //arbitrarily selected; may need to increase
+    private static final int MAX_RECORD_LENGTH = 1_000_000;
+
+    /**
+     * Does the mask bit mean it's compressed or uncompressed?
+     */
+    private final boolean maskMeansCompressed;
+    /**
+     * How much to append to the code length in the stream
+     * to get the real code length? Normally 2 or 3
+     */
+    private final int codeLengthIncrease;
+    /**
+     * Does the 12 bits of the position get stored in
+     * Little Endian or Big Endian form?
+     * This controls whether a pos+length of 0x12 0x34
+     * becomes a position of 0x123 or 0x312
+     */
+    private final boolean positionIsBigEndian;
+
+    protected LZWDecompresser(boolean maskMeansCompressed,
+                              int codeLengthIncrease, boolean 
positionIsBigEndian) {
+        this.maskMeansCompressed = maskMeansCompressed;
+        this.codeLengthIncrease = codeLengthIncrease;
+        this.positionIsBigEndian = positionIsBigEndian;
+    }
+
+    /**
+     * Populates the dictionary, and returns where in it
+     * to begin writing new codes.
+     * Generally, if the dictionary is pre-populated, then new
+     * codes should be placed at the end of that block.
+     * Equally, if the dictionary is left with all zeros, then
+     * usually the new codes can go in at the start.
+     */
+    protected abstract int populateDictionary(byte[] dict);
+
+    /**
+     * Adjusts the position offset if needed when looking
+     * something up in the dictionary.
+     */
+    protected abstract int adjustDictionaryOffset(int offset);
+
+    /**
+     * Decompresses the given input stream, returning the array of bytes
+     * of the decompressed input.
+     */
+    public byte[] decompress(InputStream src) throws IOException {
+        ByteArrayOutputStream res = new ByteArrayOutputStream();
+        decompress(src, res);
+        return res.toByteArray();
+    }
+
+    /**
+     * Perform a streaming decompression of the input.
+     * Works by:
+     * 1) Reading a flag byte, the 8 bits of which tell you if the
+     * following 8 codes are compressed our un-compressed
+     * 2) Consider the 8 bits in turn
+     * 3) If the bit is set, the next code is un-compressed, so
+     * add it to the dictionary and output it
+     * 4) If the bit isn't set, then read in the length and start
+     * position in the dictionary, and output the bytes there
+     * 5) Loop until we've done all 8 bits, then read in the next
+     * flag byte
+     */
+    public void decompress(InputStream src, OutputStream res) throws 
IOException {
+        // How far through the output we've got
+        // (This is normally used &4095, so it nicely wraps)
+        // The initial value is set when populating the dictionary
+        int pos;
+        // The flag byte is treated as its 8 individual
+        //  bits, which tell us if the following 8 codes
+        //  are compressed or un-compressed
+        int flag;
+        // The mask, between 1 and 255, which is used when
+        //  processing each bit of the flag byte in turn
+        int mask;
+
+        // We use 12 bit codes:
+        // * 0-255 are real bytes
+        // * 256-4095 are the substring codes
+        // Java handily initialises our buffer / dictionary
+        //  to all zeros
+        final byte[] buffer = new byte[DICT_SIZE];
+        pos = populateDictionary(buffer);
+
+        // These are bytes as looked up in the dictionary
+        // It needs to be signed, as it'll get passed on to
+        //  the output stream
+        final byte[] dataB = IOUtils.safelyAllocate(16 + codeLengthIncrease, 
MAX_RECORD_LENGTH);
+        // This is an unsigned byte read from the stream
+        // It needs to be unsigned, so that bit stuff works
+        int dataI;
+        // The compressed code sequence is held over 2 bytes
+        int dataIPt1, dataIPt2;
+        // How long a code sequence is, and where in the
+        //  dictionary to start at
+        int len, pntr;
+
+        while ((flag = src.read()) != -1) {
+            // Compare each bit in our flag byte in turn:
+            for (mask = 1; mask < 0x100; mask <<= 1) {
+                // Is this a new code (un-compressed), or
+                //  the use of existing codes (compressed)?
+                boolean isMaskSet = (flag & mask) > 0;
+                if (isMaskSet ^ maskMeansCompressed) {
+                    // Retrieve the un-compressed code
+                    if ((dataI = src.read()) != -1) {
+                        // Save the byte into the dictionary
+                        buffer[pos++ & DICT_MASK] = (byte) dataI;
+                        // And output the byte
+                        res.write(dataI);
+                    }
+                } else {
+                    // We have a compressed sequence
+                    // Grab the next 16 bits of data
+                    dataIPt1 = src.read();
+                    dataIPt2 = src.read();
+                    if (dataIPt1 == -1 || dataIPt2 == -1) break;
+
+                    // Build up how long the code sequence is, and
+                    //  what position of the code to start at
+                    // (The position is the usually the first 12 bits,
+                    //  and the length is usually the last 4 bits)
+                    len = (dataIPt2 & 0x0F) + codeLengthIncrease;
+                    if (positionIsBigEndian) {
+                        pntr = (dataIPt1 << 4) + (dataIPt2 >>> 4);
+                    } else {
+                        pntr = dataIPt1 + ((dataIPt2 & 0xF0) << 4);
+                    }
+
+                    // Adjust the pointer as needed
+                    pntr = adjustDictionaryOffset(pntr);
+
+                    // Loop over the codes, outputting what they correspond to
+                    for (int i = 0; i < len; i++) {
+                        dataB[i] = buffer[(pntr + i) & DICT_MASK];
+                        buffer[(pos + i) & DICT_MASK] = dataB[i];
+                    }
+                    res.write(dataB, 0, len);
+
+                    // Record how far along the stream we have moved
+                    pos += len;
+                }
             }
-         }
-      }
-   }
-
-   /**
-    * Given an integer, turn it into a java byte, handling
-    *  the wrapping.
-    * This is a convenience method
-    */
-   public static byte fromInt(int b) {
-      if(b < 128) return (byte)b;
-      return (byte)(b - 256);
-   }
-   /**
-    * Given a java byte, turn it into an integer between 0
-    *  and 255 (i.e. handle the unwrapping).
-    * This is a convenience method
-    */
-   public static int fromByte(byte b) {
-      if(b >= 0) {
-         return b;
-      }
-      return b + 256;
-   }
+        }
+    }
 }

Modified: poi/trunk/src/scratchpad/src/org/apache/poi/hdgf/HDGFLZW.java
URL: 
http://svn.apache.org/viewvc/poi/trunk/src/scratchpad/src/org/apache/poi/hdgf/HDGFLZW.java?rev=1872223&r1=1872222&r2=1872223&view=diff
==============================================================================
--- poi/trunk/src/scratchpad/src/org/apache/poi/hdgf/HDGFLZW.java (original)
+++ poi/trunk/src/scratchpad/src/org/apache/poi/hdgf/HDGFLZW.java Wed Jan  1 
22:44:42 2020
@@ -70,7 +70,7 @@ public class HDGFLZW extends LZWDecompre
       }
       return pntr;
    }
-   
+
    /**
     * We want an empty dictionary, so do nothing
     */
@@ -89,7 +89,7 @@ public class HDGFLZW extends LZWDecompre
     *    or the OutputStream can't be written to
     */
    public void compress(InputStream src, OutputStream res) throws IOException {
-      HDGFLZWCompressor c = new HDGFLZWCompressor();
-      c.compress(src, res);
+      HDGFLZWCompressor c = new HDGFLZWCompressor(res);
+      c.compress(src);
    }
 }

Modified: 
poi/trunk/src/scratchpad/src/org/apache/poi/hdgf/HDGFLZWCompressor.java
URL: 
http://svn.apache.org/viewvc/poi/trunk/src/scratchpad/src/org/apache/poi/hdgf/HDGFLZWCompressor.java?rev=1872223&r1=1872222&r2=1872223&view=diff
==============================================================================
--- poi/trunk/src/scratchpad/src/org/apache/poi/hdgf/HDGFLZWCompressor.java 
(original)
+++ poi/trunk/src/scratchpad/src/org/apache/poi/hdgf/HDGFLZWCompressor.java Wed 
Jan  1 22:44:42 2020
@@ -17,242 +17,227 @@
 
 package org.apache.poi.hdgf;
 
+import static org.apache.poi.util.LZWDecompresser.DICT_MASK;
+import static org.apache.poi.util.LZWDecompresser.DICT_SIZE;
+
 import java.io.IOException;
 import java.io.InputStream;
 import java.io.OutputStream;
 
 /**
- * Helper class to handle the Visio compatible
- *  streaming LZW compression.
- * Need our own class to handle keeping track of the
- *  code buffer, pending bytes to write out etc.
- *  
- * TODO Fix this, as it starts to go wrong on
- *  large streams 
+ * Helper class to handle the Visio compatible streaming LZW compression.
+ * Need our own class to handle keeping track of the code buffer, pending 
bytes to write out etc.
+ * <p>
+ * TODO Fix this, as it starts to go wrong on large streams
  */
 /* package */ final class HDGFLZWCompressor {
-       // We use 12 bit codes:
-       // * 0-255 are real bytes
-       // * 256-4095 are the substring codes
-       // Java handily initialises our buffer / dictionary
-       //  to all zeros
-       private byte[] dict = new byte[4096];
-
-       // The next block of data to be written out, minus
-       //  its mask byte
-       private byte[] buffer = new byte[16];
-       // And how long it is
-       // (Un-compressed codes are 1 byte each, compressed codes
-       //   are two)
-       private int bufferLen;
-
-       // The raw length of a code is limited to 4 bits + 2
-       private byte[] rawCode = new byte[18];
-       // And how much we're using
-       private int rawCodeLen;
-
-       // How far through the input and output streams we are
-       private int posInp;
-       private int posOut;
-
-       // What the next mask byte to output will be
-       private int nextMask;
-       // And how many bits we've already set
-       private int maskBitsSet;
-
-       public HDGFLZWCompressor() {}
-       
-/**
- * Returns the last place that the bytes from rawCode are found
- *  at in the buffer, or -1 if they can't be found
- */
-private int findRawCodeInBuffer() {
-       // Work our way through all the codes until we
-   //  find the right one. Visio starts from the end
-       for(int i=4096-rawCodeLen; i>0; i--) {
-               boolean matches = true;
-               for(int j=0; matches && j<rawCodeLen; j++) {
-                       if(dict[i+j] == rawCode[j]) {
-                               // Fits
-                       } else {
-                               // Doesn't fit, can't be a match
-                               matches = false;
-                       }
-               }
-
-               // Was this position a match?
-               if(matches) {
-                       return i;
-               }
-       }
-
-       // Not found
-       return -1;
-}
-
-/**
- * Output the compressed representation for the bytes
- *  found in rawCode
- */
-private void outputCompressed(OutputStream res) throws IOException {
-       // It's not worth compressing only 1 or two bytes,
-       //  due to the overheads
-       // So if asked, just output uncompressed
-       if(rawCodeLen < 3) {
-               for(int i=0; i<rawCodeLen; i++) {
-                       outputUncompressed(rawCode[i], res);
-               }
-               return;
-       }
-       
-       // Grab where the data lives
-       int codesAt = findRawCodeInBuffer();
-   codesAt -= 18;
-       if(codesAt < 0) {
-          codesAt += 4096;
-       }
-
-       // Increment the mask bit count, we've done another code
-       maskBitsSet++;
-       
-       // Add the length+code to the buffer
-       // (The position is the first 12 bits, the
-       //  length is the last 4 bits)
-       int bp1 = (codesAt & 255);
-       int bp2 = (rawCodeLen-3) + ((codesAt-bp1) >> 4);
-       buffer[bufferLen] = HDGFLZW.fromInt(bp1);
-       bufferLen++;
-   buffer[bufferLen] = HDGFLZW.fromInt(bp2);
-   bufferLen++;
-   
-   // Copy the data to the dictionary in the new place
-   for(int i=0; i<rawCodeLen; i++) {
-      dict[(posOut&4095)] = rawCode[i];
-      posOut++; 
-   }
-
-       // If we're now at 8 codes, output
-       if(maskBitsSet == 8) {
-               output8Codes(res);
-       }
-}
-/**
- * Output the un-compressed byte
- */
-private void outputUncompressed(byte b, OutputStream res) throws IOException {
-       // Set the mask bit for us
-       nextMask += (1<<maskBitsSet);
-       maskBitsSet++;
-
-       // And add us to the buffer + dictionary
-       buffer[bufferLen] = b;
-       bufferLen++;
-       dict[(posOut&4095)] = b;
-       posOut++;
-
-       // If we're now at 8 codes, output
-       if(maskBitsSet == 8) {
-               output8Codes(res);
-       }
-}
-
-/**
- * We've got 8 code worth to write out, so
- *  output along with the header
- */
-private void output8Codes(OutputStream res) throws IOException {
-       // Output the mask and the data
-       res.write(new byte[] { HDGFLZW.fromInt(nextMask) } );
-       res.write(buffer, 0, bufferLen);
-
-       // Reset things
-       nextMask = 0;
-       maskBitsSet = 0;
-       bufferLen = 0;
-}
-
-/**
- * Does the compression
- */
-public void compress(InputStream src, OutputStream res) throws IOException {
-       // Have we hit the end of the file yet?
-       boolean going = true;
-
-       // This is a byte as looked up in the dictionary
-       // It needs to be signed, as it'll get passed on to
-       //  the output stream
-       byte dataB;
-       // This is an unsigned byte read from the stream
-       // It needs to be unsigned, so that bit stuff works
-       int dataI;
-
-       while( going ) {
-               dataI = src.read();
-               posInp++;
-               if(dataI == -1) { going = false; }
-               dataB = HDGFLZW.fromInt(dataI);
-
-               // If we've run out of data, output anything that's
-               //  pending then finish
-               if(!going) {
-                  if(rawCodeLen > 0) {
-                outputCompressed(res);
-                if(maskBitsSet > 0) {
-                   output8Codes(res);
-                }
-                  }
-                       break;
-               }
-
-               // Try adding this new byte onto rawCode, and
-               //  see if all of that is still found in the
-               //  buffer dictionary or not
-               rawCode[rawCodeLen] = dataB;
-               rawCodeLen++;
-               int rawAt = findRawCodeInBuffer();
-               
-               // If we found it and are now at 18 bytes,
-               //  we need to output our pending code block
-               if(rawCodeLen == 18 && rawAt > -1) {
-                       outputCompressed(res);
-                       rawCodeLen = 0;
-                       continue;
-               }
-
-               // If we did find all of rawCode with our new
-               //  byte added on, we can wait to see what happens
-               //  with the next byte
-               if(rawAt > -1) {
-                       continue;
-               }
-
-               // If we get here, then the rawCode + this byte weren't
-               // found in the dictionary
-
-               // If there was something in rawCode before, then that was
-               // found in the dictionary, so output that compressed
-               rawCodeLen--;
-               if(rawCodeLen > 0) {
-                       // Output the old rawCode
-                       outputCompressed(res);
-
-                       // Can this byte start a new rawCode, or does
-                       //  it need outputting itself?
-                       rawCode[0] = dataB;
-                       rawCodeLen = 1;
-                       if(findRawCodeInBuffer() > -1) {
-                               // Fits in, wait for next byte
-                               continue;
-                       }
-                       // Doesn't fit, output
-                       outputUncompressed(dataB,res);
-                       rawCodeLen = 0;
-               } else {
-                       // Nothing in rawCode before, so this byte
-                       //  isn't in the buffer dictionary
-                       // Output it un-compressed
-                       outputUncompressed(dataB,res);
-               }
-       }
-}
+    // We use 12 bit codes:
+    // * 0-255 are real bytes
+    // * 256-4095 are the substring codes
+    // Java handily initialises our buffer / dictionary
+    //  to all zeros
+    private final byte[] dict = new byte[DICT_SIZE];
+
+    // The next block of data to be written out, minus its mask byte
+    private final byte[] buffer = new byte[16];
+    // And how long it is
+    // (Un-compressed codes are 1 byte each, compressed codes are two)
+    private int bufferLen;
+
+    // The raw length of a code is limited to 4 bits + 2
+    private final byte[] rawCode = new byte[18];
+    // And how much we're using
+    private int rawCodeLen;
+
+    // How far through the input and output streams we are
+    private int posInp;
+    private int posOut;
+
+    // What the next mask byte to output will be
+    private int nextMask;
+    // And how many bits we've already set
+    private int maskBitsSet;
+
+    private final OutputStream res;
+
+    public HDGFLZWCompressor(OutputStream res) {
+        this.res = res;
+    }
+
+    /**
+     * Returns the last place that the bytes from rawCode are found
+     * at in the buffer, or -1 if they can't be found
+     */
+    private int findRawCodeInBuffer() {
+        // Work our way through all the codes until we
+        //  find the right one. Visio starts from the end
+        for (int i = rawCodeLen+1; i < DICT_SIZE; i++) {
+            int pos = (posInp - i) & DICT_MASK;
+            // in the example data it seems, that the compressor doesn't like 
to wrap beyond DICT_SIZE
+            // if (pos + rawCodeLen > DICT_SIZE) continue;
+            boolean matches = true;
+            for (int j = 0; j < rawCodeLen; j++) {
+                if (dict[(pos + j) & DICT_MASK] != rawCode[j]) {
+                    // Doesn't fit, can't be a match
+                    matches = false;
+                    break;
+                }
+            }
+
+            // Was this position a match?
+            if (matches) {
+                return pos;
+            }
+        }
+
+        // Not found
+        return -1;
+    }
+
+    /**
+     * Output the compressed representation for the bytes
+     * found in rawCode
+     */
+    private void outputCompressed() throws IOException {
+        // It's not worth compressing only 1 or two bytes, due to the overheads
+        // So if asked, just output uncompressed
+        if (rawCodeLen < 3) {
+            final int rcl = rawCodeLen;
+            for (int i = 0; i < rcl; i++) {
+                outputUncompressed(rawCode[i]);
+            }
+            return;
+        }
+
+        // Grab where the data lives
+        int codesAt = findRawCodeInBuffer();
+        codesAt = (codesAt-18) & DICT_MASK;
+
+        // Increment the mask bit count, we've done another code
+        maskBitsSet++;
+
+        // Add the length+code to the buffer
+        // (The position is the first 12 bits, the length is the last 4 bits)
+        int bp1 = (codesAt & 0xFF);
+        int bp2 = (rawCodeLen - 3) + ((codesAt - bp1) >>> 4);
+        buffer[bufferLen++] = (byte) bp1;
+        buffer[bufferLen++] = (byte) bp2;
+
+        assert(maskBitsSet <= 8);
+
+        // If we're now at 8 codes, output
+        if (maskBitsSet == 8) {
+            output8Codes();
+        }
+
+        rawCodeLen = 0;
+    }
+
+    /**
+     * Output the un-compressed byte
+     */
+    private void outputUncompressed(byte b) throws IOException {
+        // Set the mask bit for us
+        nextMask += (1 << maskBitsSet);
+        maskBitsSet++;
+
+        // And add us to the buffer + dictionary
+        buffer[bufferLen++] = b;
+
+        // If we're now at 8 codes, output
+        if (maskBitsSet == 8) {
+            output8Codes();
+        }
+
+        rawCodeLen = 0;
+    }
+
+    /**
+     * We've got 8 code worth to write out, so
+     * output along with the header
+     */
+    private void output8Codes() throws IOException {
+        // Output the mask and the data
+        res.write(nextMask);
+        res.write(buffer, 0, bufferLen);
+        posOut += 1 + bufferLen;
+
+        // Reset things
+        nextMask = 0;
+        maskBitsSet = 0;
+        bufferLen = 0;
+    }
+
+    /**
+     * Does the compression
+     */
+    public void compress(InputStream src) throws IOException {
+        int dataI = -1;
+        while (true) {
+            if (dataI > -1) {
+                // copy the last read byte into the dictionary.
+                // the example data compressor used self references, so we 
don't wait for filling the dictionary
+                // until we know if it's a un-/compressed token.
+                dict[(posInp++) & DICT_MASK] = (byte)dataI;
+            }
+            // This is an unsigned byte read from the stream
+            // It needs to be unsigned, so that bit stuff works
+            dataI = src.read();
+
+            // If we've run out of data, output anything that's pending then 
finish
+            if (dataI == -1) {
+                if (rawCodeLen > 0) {
+                    outputCompressed();
+                    if (maskBitsSet > 0) {
+                        output8Codes();
+                    }
+                }
+                break;
+            }
+
+            // This is a byte as looked up in the dictionary
+            // It needs to be signed, as it'll get passed on to the output 
stream
+            byte dataB = (byte) dataI;
+
+            // Try adding this new byte onto rawCode, and see if all of that 
is still found
+            // in the buffer dictionary or not
+            rawCode[rawCodeLen++] = dataB;
+            int rawAt = findRawCodeInBuffer();
+
+            if (rawAt > -1) {
+                // If we found it and are now at 18 bytes, we need to output 
our pending code block
+                if (rawCodeLen == 18) {
+                    outputCompressed();
+                }
+
+                // If we did find all of rawCode with our new byte added on,
+                // we can wait to see what happens with the next byte
+                continue;
+            }
+
+            // If we get here, then the rawCode + this byte weren't found in 
the dictionary
+
+            // If there was something in rawCode before, then that was
+            // found in the dictionary, so output that compressed
+            rawCodeLen--;
+            if (rawCodeLen > 0) {
+                // Output the old rawCode
+                outputCompressed();
+
+                // Can this byte start a new rawCode, or does it need 
outputting itself?
+                rawCode[0] = dataB;
+                rawCodeLen = 1;
+                if (findRawCodeInBuffer() > -1) {
+                    // Fits in, wait for next byte
+                    continue;
+                }
+                // Doesn't fit, output
+                outputUncompressed(dataB);
+            } else {
+                // Nothing in rawCode before, so this byte isn't in the buffer 
dictionary
+                // Output it un-compressed
+                outputUncompressed(dataB);
+            }
+        }
+    }
 }

Modified: 
poi/trunk/src/scratchpad/testcases/org/apache/poi/hdgf/TestHDGFLZW.java
URL: 
http://svn.apache.org/viewvc/poi/trunk/src/scratchpad/testcases/org/apache/poi/hdgf/TestHDGFLZW.java?rev=1872223&r1=1872222&r2=1872223&view=diff
==============================================================================
--- poi/trunk/src/scratchpad/testcases/org/apache/poi/hdgf/TestHDGFLZW.java 
(original)
+++ poi/trunk/src/scratchpad/testcases/org/apache/poi/hdgf/TestHDGFLZW.java Wed 
Jan  1 22:44:42 2020
@@ -17,139 +17,112 @@
 
 package org.apache.poi.hdgf;
 
+import static org.junit.Assert.assertArrayEquals;
 import static org.junit.Assert.assertEquals;
 
 import java.io.ByteArrayInputStream;
+import java.util.Arrays;
 
-import org.junit.Ignore;
 import org.junit.Test;
 
 public final class TestHDGFLZW {
-       public static final byte[] testTrailerComp = {
-               123,      // *mask bit*
-               -60, 2,
-               -21, -16, // 3 @ 4093
-               1, 0, 0, -72,
-               -13, -16, // 3 @ 5
-               78,       // *mask bit* 2,3,4,7
-               -32, -5,  // 14 @ 4082
-               1, 0, 3,
-               -21, -16, // 3 @ 4093
-               10, 5,    // 8 @ 28
-               4,
-               -21, -16, // 3 @ 4093
-               21,       // *mask bit* 1,3,5
-               9,
-               -21, -16, // 3 @ 4093
-               103,
-               -21, -16, // 3 @ 4093
-               34,
-               -36, -1,  // 18 @ 4078
-               52, 15,   // 18 @ 70
-               70, 15,   // 18 @ 88
-               120,      // *mask bit*
-               88, 15,   // 18 @ 106
-               -7, -2,   // 17 @ 11
-               -28, -9,  // 10 @ 4086
-               -123, 21, 0, 44,
-               -122, 1,  // 4 @ 152
-               -4,       // *mask bit*
-               104, 15,  // 18 @ 122
-               -24, -13, 40, -98, 32,
-               78, 102, -67, -1, -2, -30, 64, 40, -67, -113, -73, 116, -98,
-               -85, 2, 66, 123, 9, 109, -85, 2, -89, 14, -56, -69, -83, -79,
-               -34, -3, 120, 110, 75, -9, -10, 20, -6, -25, -12, 22, -21, -16,
-               -12, -81, 67, 1, -128, -70, -21, -16, 84, -21, -16, 70, 0, 23,
-               -21, -16, 76, 47, -40, 79, 1, -44, -21, -16, 32, 3, 18, 12, 17,
-               -43, -68, 17, 16, -8, 21, 22, -1, -21, -16, -84, -1, -35, 79,
-               -9, -10, 96, 0, 46, -21, -16, 44, -39, -41, 79, 1, 119, -13,
-               -16, -106, -13, -16, 84, 0, 125, 26, -21, -16, 68, -38, 79, 1,
-               17, 10, 0, -97, 50, 10, 0, 0, -42, -108, 15, 118, 31, 0, -3, 29,
-               -21, -16, -100, -25, 79, 1, -18, 97, -36, 76, 16, -21, -16, 86,
-               0, 36, -5, 1, -5, 79, 63, 1, -124, 98, 0, 0, 28, 3, 20, -34, -3,
-               125, 33, -21, -16, 100, -4, 79, 1, -92, -91, 16, -22, 24, 19, 
41,
-               -21, -16, -44, -59, 16, 108, 100, 0, -21, 0, 71, -105, 18, 39, 
85,
-               17, -3, 79, 1, 95, -108, 113, 0, 0, 104, 3, 18, 49, 49, 17, -1, 
64,
-               85, 1, 0, 114, 0, 0, -93, -36, -21, -16, 100, 31, 0, 0, -40, 
-21,
-               -16, -92, 66, 127, 85, 1, 98, 119, 0, 0, -48, 79, 18, -3, 50, 
-17,
-               1, 67, 85, 1, 81, -127, 0, -41, 0, 14, 6, 4, 17, 63, -63, 17, 
68,
-               85, -65, 1, 30, -120, 0, 0, 42, 79, 18, 68, 126, -21, -16, -76, 
69,
-               85, 1, 102, -119, 72, 37, 0, 97, 33 };
-       public static final byte[] testTrailerDecomp = {
-               -60, 2, 0, 0, 0, 1, 0, 0, -72, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0,
-               0, 0, 0, 0, 0, 1, 0, 3, 0, 0, 0, 3, 0, 0, 0, 3, 0, 0, 0, 4, 0, 
0,
-               0, 9, 0, 0, 0, 103, 0, 0, 0, 34, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0,
-               0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0,
-               0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0,
-               0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0,
-               0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0,
-               -123, 21, 0, 44, -123, 21, 0, 44, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0,
-               0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 40, -98, 32, 78, 102, 
-67,
-               -2, -30, 64, 40, -67, -113, -73, 116, -67, -2, -30, 64, 40, 66,
-               123, 9, 109, -67, -2, -30, 64, 40, -98, 32, 78, 102, -67, -2, 
-30,
-               64, 40, -67, -113, -73, 116, -67, -2, -30, 64, -56, -83, -79, 
0, 0,
-               0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 120, 110, 75, 1, 0, 
0, 0,
-               0, 0, 0, 0, 0, 20, 0, 0, 0, 0, 0, 0, 0, 22, 0, 0, 0, -12, -81, 
67,
-               1, -128, 0, 0, 0, 84, 0, 0, 0, 70, 0, 23, 0, 0, 0, 76, -40, 79, 
1,
-               -44, 0, 0, 0, 32, 0, 0, 0, 84, 0, 23, 0, 0, 0, -68, -40, 79, 1, 
-8,
-               0, 0, 0, 32, 0, 0, 0, 84, 0, -1, 0, 0, 0, -84, -1, 79, 1, 0, 0, 
0,
-               0, 0, 0, 0, 0, 96, 0, 46, 0, 0, 0, 44, -39, 79, 1, 119, 1, 0, 0,
-               -106, 1, 0, 0, 84, 0, 26, 0, 0, 0, 68, -38, 79, 1, 17, 3, 0, 0,
-               50, 10, 0, 0, -42, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
-               0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0,
-               29, 0, 0, 0, -100, -25, 79, 1, -18, 97, 0, 0, -106, 0, 0, 0, 
86, 0,
-               36, 0, 0, 0, -12, -5, 79, 1, -124, 98, 0, 0, 28, 0, 0, 0, 84, 
0, 0,
-               0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 33, 0, 0, 0, 
100,
-               -4, 79, 1, -92, 98, 0, 0, 32, 0, 0, 0, 84, 0, 41, 0, 0, 0, -44, 
-4,
-               79, 1, 108, 100, 0, 0, 71, 0, 0, 0, 86, 0, 39, 0, 0, 0, 68, -3, 
79,
-               1, -108, 113, 0, 0, 104, 0, 0, 0, 84, 0, 49, 0, 0, 0, -84, 64, 
85,
-               1, 0, 114, 0, 0, -93, 0, 0, 0, -42, 0, 0, 0, 0, 0, 0, 0, 0, 0, 
0,
-               0, 0, 0, 0, 0, 0, 0, 0, 0, -40, 0, 0, 0, -92, 66, 85, 1, 98, 
119,
-               0, 0, -48, 1, 0, 0, 84, 0, 50, 0, 0, 0, 20, 67, 85, 1, 81, -127,
-               0, 0, 14, 6, 0, 0, 84, 0, 63, 0, 0, 0, 100, 68, 85, 1, 30, -120,
-               0, 0, 42, 1, 0, 0, 84, 0, 68, 0, 0, 0, -76, 69, 85, 1, 102, 
-119,
-               0, 0, 42, 1, 0, 0, 84, 0, 0, 0, 0, 0
-       };
-
-       @Test
-       public void testFromToInt() {
-               byte b255 = -1;
-               assertEquals(255, HDGFLZW.fromByte(b255));
-               assertEquals(-1, HDGFLZW.fromInt( HDGFLZW.fromByte(b255) ));
-               assertEquals(-1, HDGFLZW.fromInt( 255 ));
-
-               byte b11 = 11;
-               assertEquals(11, HDGFLZW.fromByte(b11));
-               assertEquals(11, HDGFLZW.fromInt( HDGFLZW.fromByte(b11) ));
-               assertEquals(11, HDGFLZW.fromInt( 11 ));
-
-               byte b0 = 0;
-               assertEquals(0, HDGFLZW.fromByte(b0));
-               assertEquals(0, HDGFLZW.fromInt( HDGFLZW.fromByte(b0) ));
-               assertEquals(0, HDGFLZW.fromInt( 0 ));
-
-               byte b127 = 127;
-               assertEquals(127, HDGFLZW.fromByte(b127));
-               assertEquals(127, HDGFLZW.fromInt( HDGFLZW.fromByte(b127) ));
-               assertEquals(127, HDGFLZW.fromInt( 127 ));
-
-               byte b128 = -128;
-               assertEquals(128, HDGFLZW.fromByte(b128));
-               assertEquals(-128, HDGFLZW.fromInt( HDGFLZW.fromByte(b128) ));
-               assertEquals(-128, HDGFLZW.fromInt( 128 ));
-       }
-
-       @Test
-       public void testCounts() throws Exception {
-               assertEquals(339, testTrailerComp.length);
-               assertEquals(632, testTrailerDecomp.length);
-
-               // decompress it using our engine
-               HDGFLZW lzw = new HDGFLZW();
-               byte[] dec = lzw.decompress(new 
ByteArrayInputStream(testTrailerComp));
+    public static final byte[] testTrailerComp = {
+            123,      // *mask bit* 1,2,4-7
+            -60, 2,
+            -21, -16, // 3 @ 4093
+            1, 0, 0, -72,
+            -13, -16, // 3 @ 5
+            78,       // *mask bit* 2,3,4,7
+            -32, -5,  // 14 @ 4082
+            1, 0, 3,
+            -21, -16, // 3 @ 4093
+            10, 5,    // 8 @ 28
+            4,
+            -21, -16, // 3 @ 4093
+            21,       // *mask bit* 1,3,5
+            9,
+            -21, -16, // 3 @ 4093
+            103,
+            -21, -16, // 3 @ 4093
+            34,
+            -36, -1,  // 18 @ 4078
+            52, 15,   // 18 @ 70
+            70, 15,   // 18 @ 88
+            120,      // *mask bit*
+            88, 15,   // 18 @ 106
+            -7, -2,   // 17 @ 11
+            -28, -9,  // 10 @ 4086
+            -123, 21, 0, 44,
+            -122, 1,  // 4 @ 152
+            -4,       // *mask bit*
+            104, 15,  // 18 @ 122
+            -24, -13, 40, -98, 32,
+            78, 102, -67, -1, -2, -30, 64, 40, -67, -113, -73, 116, -98,
+            -85, 2, 66, 123, 9, 109, -85, 2, -89, 14, -56, -69, -83, -79,
+            -34, -3, 120, 110, 75, -9, -10, 20, -6, -25, -12, 22, -21, -16,
+            -12, -81, 67, 1, -128, -70, -21, -16, 84, -21, -16, 70, 0, 23,
+            -21, -16, 76, 47, -40, 79, 1, -44, -21, -16, 32, 3, 18, 12, 17,
+            -43, -68, 17, 16, -8, 21, 22, -1, -21, -16, -84, -1, -35, 79,
+            -9, -10, 96, 0, 46, -21, -16, 44, -39, -41, 79, 1, 119, -13,
+            -16, -106, -13, -16, 84, 0, 125, 26, -21, -16, 68, -38, 79, 1,
+            17, 10, 0, -97, 50, 10, 0, 0, -42, -108, 15, 118, 31, 0, -3, 29,
+            -21, -16, -100, -25, 79, 1, -18, 97, -36, 76, 16, -21, -16, 86,
+            0, 36, -5, 1, -5, 79, 63, 1, -124, 98, 0, 0, 28, 3, 20, -34, -3,
+            125, 33, -21, -16, 100, -4, 79, 1, -92, -91, 16, -22, 24, 19, 41,
+            -21, -16, -44, -59, 16, 108, 100, 0, -21, 0, 71, -105, 18, 39, 85,
+            17, -3, 79, 1, 95, -108, 113, 0, 0, 104, 3, 18, 49, 49, 17, -1, 64,
+            85, 1, 0, 114, 0, 0, -93, -36, -21, -16, 100, 31, 0, 0, -40, -21,
+            -16, -92, 66, 127, 85, 1, 98, 119, 0, 0, -48, 79, 18, -3, 50, -17,
+            1, 67, 85, 1, 81, -127, 0, -41, 0, 14, 6, 4, 17, 63, -63, 17, 68,
+            85, -65, 1, 30, -120, 0, 0, 42, 79, 18, 68, 126, -21, -16, -76, 69,
+            85, 1, 102, -119, 72, 37, 0, 97, 33};
+    public static final byte[] testTrailerDecomp = {
+            -60, 2, 0, 0, 0, 1, 0, 0, -72, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+            0, 0, 0, 0, 0, 1, 0, 3, 0, 0, 0, 3, 0, 0, 0, 3, 0, 0, 0, 4, 0, 0,
+            0, 9, 0, 0, 0, 103, 0, 0, 0, 34, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 1, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+            -123, 21, 0, 44, -123, 21, 0, 44, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 40, -98, 32, 78, 102, -67,
+            -2, -30, 64, 40, -67, -113, -73, 116, -67, -2, -30, 64, 40, 66,
+            123, 9, 109, -67, -2, -30, 64, 40, -98, 32, 78, 102, -67, -2, -30,
+            64, 40, -67, -113, -73, 116, -67, -2, -30, 64, -56, -83, -79, 0, 0,
+            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 120, 110, 75, 1, 0, 0, 0,
+            0, 0, 0, 0, 0, 20, 0, 0, 0, 0, 0, 0, 0, 22, 0, 0, 0, -12, -81, 67,
+            1, -128, 0, 0, 0, 84, 0, 0, 0, 70, 0, 23, 0, 0, 0, 76, -40, 79, 1,
+            -44, 0, 0, 0, 32, 0, 0, 0, 84, 0, 23, 0, 0, 0, -68, -40, 79, 1, -8,
+            0, 0, 0, 32, 0, 0, 0, 84, 0, -1, 0, 0, 0, -84, -1, 79, 1, 0, 0, 0,
+            0, 0, 0, 0, 0, 96, 0, 46, 0, 0, 0, 44, -39, 79, 1, 119, 1, 0, 0,
+            -106, 1, 0, 0, 84, 0, 26, 0, 0, 0, 68, -38, 79, 1, 17, 3, 0, 0,
+            50, 10, 0, 0, -42, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+            29, 0, 0, 0, -100, -25, 79, 1, -18, 97, 0, 0, -106, 0, 0, 0, 86, 0,
+            36, 0, 0, 0, -12, -5, 79, 1, -124, 98, 0, 0, 28, 0, 0, 0, 84, 0, 0,
+            0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0, 33, 0, 0, 0, 
100,
+            -4, 79, 1, -92, 98, 0, 0, 32, 0, 0, 0, 84, 0, 41, 0, 0, 0, -44, -4,
+            79, 1, 108, 100, 0, 0, 71, 0, 0, 0, 86, 0, 39, 0, 0, 0, 68, -3, 79,
+            1, -108, 113, 0, 0, 104, 0, 0, 0, 84, 0, 49, 0, 0, 0, -84, 64, 85,
+            1, 0, 114, 0, 0, -93, 0, 0, 0, -42, 0, 0, 0, 0, 0, 0, 0, 0, 0, 0,
+            0, 0, 0, 0, 0, 0, 0, 0, 0, -40, 0, 0, 0, -92, 66, 85, 1, 98, 119,
+            0, 0, -48, 1, 0, 0, 84, 0, 50, 0, 0, 0, 20, 67, 85, 1, 81, -127,
+            0, 0, 14, 6, 0, 0, 84, 0, 63, 0, 0, 0, 100, 68, 85, 1, 30, -120,
+            0, 0, 42, 1, 0, 0, 84, 0, 68, 0, 0, 0, -76, 69, 85, 1, 102, -119,
+            0, 0, 42, 1, 0, 0, 84, 0, 0, 0, 0, 0
+    };
+
+    @Test
+    public void testCounts() throws Exception {
+        assertEquals(339, testTrailerComp.length);
+        assertEquals(632, testTrailerDecomp.length);
+
+        // decompress it using our engine
+        HDGFLZW lzw = new HDGFLZW();
+        byte[] dec = lzw.decompress(new ByteArrayInputStream(testTrailerComp));
 
-               // Check it's of the right size
-               assertEquals(632, dec.length);
+        // Check it's of the right size
+        assertEquals(632, dec.length);
 
 /*
                // Encode it again using our engine
@@ -158,121 +131,89 @@ public final class TestHDGFLZW {
                // Check it's of the right size
                assertEquals(339, comp.length);
 */
-       }
+    }
 
-       @Test
-       public void testDecompress() throws Exception {
-               assertEquals(339, testTrailerComp.length);
-               assertEquals(632, testTrailerDecomp.length);
-
-               // decompress it using our engine
-               HDGFLZW lzw = new HDGFLZW();
-               byte[] dec = lzw.decompress(new 
ByteArrayInputStream(testTrailerComp));
-
-               // Now check it's the right data
-               assertEquals(632, dec.length);
-               for(int i=0; i<dec.length; i++) {
-                       if(dec[i] != testTrailerDecomp[i])
-                               System.err.println(i + "\t" + dec[i] + "\t" + 
testTrailerDecomp[i]);
-               }
-       }
-
-       /**
-        * Test that we can round-trip a little bit.
-        * Uses a part short enough that we agree with visio
-        *  on the best way to compress it
-        */
-       @Test
-       public void testCompressMini() throws Exception {
-          // first 11 bytes compressed = 12 bytes uncompressed
-          byte[] sourceComp = new byte[11];
-          byte[] sourceDecomp = new byte[12];
-          System.arraycopy(testTrailerComp, 0, sourceComp, 0, 
sourceComp.length);
-      System.arraycopy(testTrailerDecomp, 0, sourceDecomp, 0, 
sourceDecomp.length);
-
-               // Compress it using our engine
-               HDGFLZW lzw = new HDGFLZW();
-               byte[] comp = lzw.compress(new 
ByteArrayInputStream(sourceDecomp));
+    @Test
+    public void testDecompress() throws Exception {
+        assertEquals(339, testTrailerComp.length);
+        assertEquals(632, testTrailerDecomp.length);
+
+        // decompress it using our engine
+        HDGFLZW lzw = new HDGFLZW();
+        byte[] dec = lzw.decompress(new ByteArrayInputStream(testTrailerComp));
+
+        // Now check it's the right data
+        assertArrayEquals(testTrailerDecomp, dec);
+    }
+
+    /**
+     * Test that we can round-trip a little bit.
+     * Uses a part short enough that we agree with visio
+     * on the best way to compress it
+     */
+    @Test
+    public void testCompressMini() throws Exception {
+        // first 11 bytes compressed = 12 bytes uncompressed
+        byte[] sourceDecomp = Arrays.copyOf(testTrailerDecomp, 12);
+
+        // Compress it using our engine
+        HDGFLZW lzw = new HDGFLZW();
+        byte[] comp = lzw.compress(new ByteArrayInputStream(sourceDecomp));
 
-               // Now decompress it again
-               byte[] decomp = lzw.decompress(new ByteArrayInputStream(comp));
+        // Now decompress it again
+        byte[] decomp = lzw.decompress(new ByteArrayInputStream(comp));
 
-               // First up, check the round tripping
+        // First up, check the round tripping
                assertEquals(12, decomp.length);
-      for(int i=0; i<decomp.length; i++) {
-         assertEquals("Wrong at " + i, decomp[i], testTrailerDecomp[i]);
-      }
-
-               // Now check the compressed intermediate version
-      assertEquals(11, comp.length);
-      for(int i=0; i<comp.length; i++) {
-         assertEquals("Wrong at " + i, comp[i], testTrailerComp[i]);
-      }
-       }
-
-       /**
-        * Tests that we can do several mask pages
-        */
-       @Test
-   public void testCompressMidi() throws Exception {
-      // First 12 -> 11
-      // Next 32 -> 13
-      byte[] sourceComp = new byte[24];
-      byte[] sourceDecomp = new byte[44];
-      System.arraycopy(testTrailerComp, 0, sourceComp, 0, sourceComp.length);
-      System.arraycopy(testTrailerDecomp, 0, sourceDecomp, 0, 
sourceDecomp.length);
-
-      // Compress it using our engine
-      HDGFLZW lzw = new HDGFLZW();
-      byte[] comp = lzw.compress(new ByteArrayInputStream(sourceDecomp));
-
-      // We should be 3 characters bigger, as
-      //  we split one compressed bit into two
-      assertEquals(27, comp.length);
-
-      // Now decompress it again
-      byte[] decomp = lzw.decompress(new ByteArrayInputStream(comp));
-
-      // We can only check the round-tripping, as for now
-      //  visio cheats on re-using a block
-      assertEquals(44, decomp.length);
-      for(int i=0; i<decomp.length; i++) {
-         assertEquals("Wrong at " + i, decomp[i], sourceDecomp[i]);
-      }
-   }
-
-   /**
-    * Gets 160 bytes through then starts going wrong...
-    * TODO Fix this
-    */
-   @Test
-   @Ignore
-   public void testCompressFull() throws Exception {
-      assertEquals(339, testTrailerComp.length);
-      assertEquals(632, testTrailerDecomp.length);
-
-      // Compress it using our engine
-      HDGFLZW lzw = new HDGFLZW();
-      byte[] comp = lzw.compress(new ByteArrayInputStream(testTrailerDecomp));
-
-      // Now decompress it again
-      byte[] decomp = lzw.decompress(new ByteArrayInputStream(comp));
-
-//      for(int i=0; i<comp.length; i++) {
-//         System.err.println(i + "\t" + comp[i] + "\t" + testTrailerComp[i]);
-//      }
-
-      // First up, check the round tripping
-//    assertEquals(632, decomp.length);
-      for(int i=0; i<decomp.length; i++) {
-         assertEquals("Wrong at " + i, decomp[i], testTrailerDecomp[i]);
-      }
-
-
-      // Now check the compressed intermediate version
-      assertEquals(339, comp.length);
-      for(int i=0; i<comp.length; i++) {
-         assertEquals("Wrong at " + i, comp[i], testTrailerComp[i]);
-      }
-   }
+               assertArrayEquals(Arrays.copyOfRange(testTrailerDecomp, 0, 
decomp.length), decomp);
+
+        // Now check the compressed intermediate version
+        assertEquals(11, comp.length);
+               assertArrayEquals(Arrays.copyOfRange(testTrailerComp, 0, 
comp.length), comp);
+    }
+
+    /**
+     * Tests that we can do several mask pages
+     */
+    @Test
+    public void testCompressMidi() throws Exception {
+        // First 12 -> 11
+        // Next 32 -> 13
+        byte[] sourceDecomp = Arrays.copyOf(testTrailerDecomp, 44);
+
+        // Compress it using our engine
+        HDGFLZW lzw = new HDGFLZW();
+        byte[] comp = lzw.compress(new ByteArrayInputStream(sourceDecomp));
+
+        assertEquals(24, comp.length);
+
+        // Now decompress it again
+        byte[] decomp = lzw.decompress(new ByteArrayInputStream(comp));
+
+        // We can only check the round-tripping, as for now
+        //  visio cheats on re-using a block
+               assertArrayEquals(sourceDecomp, decomp);
+    }
+
+    @Test
+    public void testCompressFull() throws Exception {
+        assertEquals(339, testTrailerComp.length);
+        assertEquals(632, testTrailerDecomp.length);
+
+        HDGFLZW lzw = new HDGFLZW();
+        byte[] decomp2 = lzw.decompress(new 
ByteArrayInputStream(testTrailerComp));
+        assertArrayEquals(testTrailerDecomp, decomp2);
+
+
+        // Compress it using our engine
+        byte[] comp = lzw.compress(new 
ByteArrayInputStream(testTrailerDecomp));
+
+        // the compressed binary differs, as the run length searching finds 
different results
+        // but the decompressed data is the same
+
+        // Now decompress it again
+        byte[] decomp = lzw.decompress(new ByteArrayInputStream(comp));
+
+        assertArrayEquals(testTrailerDecomp, decomp);
+    }
 }



---------------------------------------------------------------------
To unsubscribe, e-mail: commits-unsubscr...@poi.apache.org
For additional commands, e-mail: commits-h...@poi.apache.org

Reply via email to