Author: tilman
Date: Tue Sep  9 08:34:00 2025
New Revision: 1928315

Log:
PDFBOX-6065: handle KwKwK special case, by Daniel Persson with ChatGPT

Modified:
   pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/filter/LZWFilter.java

Modified: 
pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/filter/LZWFilter.java
==============================================================================
--- pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/filter/LZWFilter.java   
Tue Sep  9 08:33:57 2025        (r1928314)
+++ pdfbox/trunk/pdfbox/src/main/java/org/apache/pdfbox/filter/LZWFilter.java   
Tue Sep  9 08:34:00 2025        (r1928315)
@@ -54,9 +54,6 @@ public class LZWFilter extends Filter
      * The LZW end of data code.
      */
     public static final long EOD = 257;
-    
-    //BEWARE: codeTable must be local to each method, because there is only
-    // one instance of each filter
 
     /**
      * {@inheritDoc}
@@ -73,11 +70,12 @@ public class LZWFilter extends Filter
 
     private static void doLZWDecode(InputStream encoded, OutputStream decoded, 
boolean earlyChange) throws IOException
     {
-        List<byte[]> codeTable = new ArrayList<>();
+        List<byte[]> codeTable = createCodeTable();      // includes CLEAR/EOD 
handling as needed
         int chunk = 9;
         final MemoryCacheImageInputStream in = new 
MemoryCacheImageInputStream(encoded);
+
+        byte[] prev = null; // no previous string yet
         long nextCommand;
-        long prevCommand = -1;
 
         try
         {
@@ -87,60 +85,50 @@ public class LZWFilter extends Filter
                 {
                     chunk = 9;
                     codeTable = createCodeTable();
-                    prevCommand = -1;
+                    prev = null;
+                    continue;
                 }
-                else
+
+                byte[] curr;
+
+                if (nextCommand < codeTable.size())
                 {
-                    if (nextCommand < codeTable.size())
-                    {
-                        byte[] data = codeTable.get((int) nextCommand);
-                        byte firstByte = data[0];
-                        decoded.write(data);
-                        if (prevCommand != -1)
-                        {
-                            checkIndexBounds(codeTable, prevCommand, in);
-                            data = codeTable.get((int) prevCommand);
-                            byte[] newData = Arrays.copyOf(data, data.length + 
1);
-                            newData[data.length] = firstByte;
-                            codeTable.add(newData);
-                        }
-                    }
-                    else
+                    // Normal case: code exists
+                    curr = codeTable.get((int) nextCommand);
+                    decoded.write(curr);
+
+                    if (prev != null)
                     {
-                        checkIndexBounds(codeTable, prevCommand, in);
-                        byte[] data = codeTable.get((int) prevCommand);
-                        byte[] newData = Arrays.copyOf(data, data.length + 1);
-                        newData[data.length] = data[0];
-                        decoded.write(newData);
-                        codeTable.add(newData);
+                        // Add prev + first(curr)
+                        byte[] entry = Arrays.copyOf(prev, prev.length + 1);
+                        entry[prev.length] = curr[0];
+                        codeTable.add(entry);
                     }
-                    
-                    chunk = calculateChunk(codeTable.size(), earlyChange);
-                    prevCommand = nextCommand;
                 }
+                else if (nextCommand == codeTable.size() && prev != null)
+                {
+                    // KwKwK case: code equals next available index
+                    curr = Arrays.copyOf(prev, prev.length + 1);
+                    curr[prev.length] = prev[0];
+                    decoded.write(curr);
+                    codeTable.add(curr);
+                }
+                else
+                {
+                    // Corrupt stream (code out of range, or KwKwK without 
prev)
+                    throw new EOFException("Invalid LZW code: " + nextCommand);
+                }
+
+                prev = curr; // move forward
+                chunk = calculateChunk(codeTable.size(), earlyChange);
             }
         }
         catch (EOFException ex)
         {
             LOG.warn("Premature EOF in LZW stream, EOD code missing", ex);
         }
-        decoded.flush();
-    }
 
-    private static void checkIndexBounds(List<byte[]> codeTable, long index, 
MemoryCacheImageInputStream in)
-            throws IOException
-    {
-        if (index < 0)
-        {
-            throw new IOException("negative array index: " + index + " near 
offset "
-                    + in.getStreamPosition());
-        }
-        if (index >= codeTable.size())
-        {
-            throw new IOException("array index overflow: " + index +
-                    " >= " + codeTable.size() + " near offset "
-                    + in.getStreamPosition());
-        }
+        decoded.flush();
     }
 
     /**

Reply via email to