[sis] 02/02: Allocate memory for new entries by blocks of 4 bytes instead of doing an unconditional allocation for each new entry. It reduces the amount of calls to `System.arraycopy(…)` by a factor close to 4 and also reduces memory consumption. The performance improvement was not as expected; this new version is even a bit slower than the one in previous commit. It is a little bit surprising because a code block with branching, bounds checks and copies should be executed less often (the `if (allocate [...]

desruisseaux Fri, 08 Jul 2022 06:07:52 -0700

This is an automated email from the ASF dual-hosted git repository.

desruisseaux pushed a commit to branch geoapi-4.0
in repository https://gitbox.apache.org/repos/asf/sis.git


commit 35b5576064b43b90df8959e4e71adc1491c6b734
Author: Martin Desruisseaux <[email protected]>
AuthorDate: Fri Jul 8 13:04:34 2022 +0200

    Allocate memory for new entries by blocks of 4 bytes instead of doing an 
unconditional allocation for each new entry.
    It reduces the amount of calls to `System.arraycopy(…)` by a factor close 
to 4 and also reduces memory consumption.
    The performance improvement was not as expected; this new version is even a 
bit slower than the one in previous commit.
    It is a little bit surprising because a code block with branching, bounds 
checks and copies should be executed less often
    (the `if (allocate) {…}` block), and the cost in the rest of the code is 
mostly a few bit twiddling, which should be cheap.
    Maybe it is related to the way HotSpot tries to optimize, in which case the 
benchmark may change in any future Java version.
    We keep this new LZW version for now because of its lower memory usage 
(almost a factor 4).
---
 .../apache/sis/internal/storage/inflater/LZW.java  | 195 ++++++++++++++-------
 1 file changed, 133 insertions(+), 62 deletions(-)

diff --git 
a/storage/sis-geotiff/src/main/java/org/apache/sis/internal/storage/inflater/LZW.java
 
b/storage/sis-geotiff/src/main/java/org/apache/sis/internal/storage/inflater/LZW.java
index a1b37f7e01..1431d28afc 100644
--- 
a/storage/sis-geotiff/src/main/java/org/apache/sis/internal/storage/inflater/LZW.java
+++ 
b/storage/sis-geotiff/src/main/java/org/apache/sis/internal/storage/inflater/LZW.java
@@ -96,6 +96,40 @@ final class LZW extends CompressionChannel {
      */
     private static final int LENGTH_MASK = (1 << LOWEST_OFFSET_BIT) - 1;
 
+    /**
+     * Number of bits in an offset that are always 0 and consequently do not 
need to be stored.
+     * An intentional consequence of this restriction is that size of blocks 
allocated in the
+     * {@link #stringsFromCode} array must be multiples of {@literal (1 << 
STRING_ALIGNMENT)}.
+     * It makes possible to use the extra size for growing a string up to that 
amount of bytes
+     * without copying it.
+     *
+     * <div class="note"><b>Note:</b>
+     * doing allocations only by blocks of 2² = 4 bytes may seem a waste of 
memory, but actually
+     * it reduces memory usage a lot (almost a factor 4) because of the copies 
avoided.
+     * We tried with alignment values 1, 2, 3 and found that 2 seems 
optimal.</div>
+     */
+    private static final int STRING_ALIGNMENT = 2;
+
+    /**
+     * Mask for a bit in an {@link #entriesForCodes} element for telling 
whether the extra space allocated in the
+     * {@link #stringsFromCode} array has already been used by another entry. 
If yes (1), then that space can not
+     * be used by new entry. Instead, the new entry will need to allocate a 
new space.
+     *
+     * <p>Note: {@link #newEntryNeedsAllocation(int)} implementation assumes 
that this bit is the sign bit.</p>
+     */
+    private static final int PREALLOCATED_SPACE_IS_USED_MASK = 1 << 
(Integer.SIZE - 1);
+
+    /**
+     * The mask to apply on an {@link #entriesForCodes} element for getting 
the compressed offset (before shifting).
+     */
+    private static final int OFFSET_MASK = (PREALLOCATED_SPACE_IS_USED_MASK - 
1) & ~LENGTH_MASK;
+
+    /**
+     * The shift to apply on a compressed offset (after application of {@link 
#OFFSET_MASK})
+     * for getting the uncompressed offset.
+     */
+    private static final int OFFSET_SHIFT = LOWEST_OFFSET_BIT - 
STRING_ALIGNMENT;
+
     /**
      * Extracts the number of bytes of an entry stored in the {@link 
#stringsFromCode} array.
      *
@@ -113,23 +147,49 @@ final class LZW extends CompressionChannel {
      * @return index of the first byte to read in {@link #stringsFromCode} 
array.
      */
     private static int offset(final int element) {
-        return element >>> LOWEST_OFFSET_BIT;
+        return (element & OFFSET_MASK) >>> OFFSET_SHIFT;
     }
 
     /**
      * Encodes an offset together with its length.
      */
     private static int offsetAndLength(final int offset, final int length) {
-        final int element = (offset << LOWEST_OFFSET_BIT) | length;
+        final int element = (offset << OFFSET_SHIFT) | length;
         assert offset(element) == offset : offset;
         assert length(element) == length : length;
         return element;
     }
 
     /**
-     * Maximal value + 1 that the offset can take. The offset takes all the 
bits after the length.
+     * Maximal value + 1 that the offset can take. The compressed offset takes 
all the bits after the length,
+     * minus one bit that we keep for the {@link 
#PREALLOCATED_SPACE_IS_USED_MASK} flag. Note that compressed
+     * offsets are multiplied by {@literal 1 << STRING_ALIGNMENT} for getting 
the actual offset.
+     */
+    private static final int OFFSET_LIMIT = 1 << (Integer.SIZE - 1 - 
OFFSET_SHIFT);
+
+    /**
+     * A mask used for detecting when a new allocation is required.
+     * If {@code (length & LENGTH_MASK_FOR_ALLOCATE) == 0} and assuming that
+     * length is always incremented by 1, then a new allocation is necessary.
      */
-    private static final int OFFSET_LIMIT = 1 << (Integer.SIZE - 
LOWEST_OFFSET_BIT);
+    private static final int LENGTH_MASK_FOR_ALLOCATE = (1 << 
STRING_ALIGNMENT) - 1;
+
+    /**
+     * Returns {@code true} if all the space allocated for the given entry is 
already used.
+     * This is true if at least one of the following conditions is true:
+     *
+     * <ul>
+     *   <li>The {@link #PREALLOCATED_SPACE_IS_USED_MASK} is set, in which 
case value is negative.</li>
+     *   <li>All the extra-space allowed by {@link #STRING_ALIGNMENT} is used, 
in which case the lowest
+     *       bits of the length are all zero.</li>
+     * </ul>
+     *
+     * @param  element  an element of the {@link #entriesForCodes} array.
+     * @return whether all the space for that entry is already used.
+     */
+    private static boolean newEntryNeedsAllocation(final int element) {
+        return (element & (PREALLOCATED_SPACE_IS_USED_MASK | 
LENGTH_MASK_FOR_ALLOCATE)) <= 0;
+    }
 
     /**
      * Pointers to byte sequences for a code in the {@link #entriesForCodes} 
array.
@@ -139,16 +199,16 @@ final class LZW extends CompressionChannel {
     private final int[] entriesForCodes;
 
     /**
-     * Offset and length of the sequence of bytes associated to the code in 
previous iteration.
-     * This is a value from {@link #entriesForCodes} array.
+     * Last code found in previous iteration. This is a valid index in the 
{@link #entriesForCodes} array.
+     * A {@link #EOI_CODE} value means that the decompression is finished.
      */
-    private int previousEntry;
+    private int previousCode;
 
     /**
      * If some bytes could not be written in previous {@code read(…)} 
execution because the target buffer was full,
-     * offset and length of those bytes. Otherwise 0. This is a value from 
{@link #entriesForCodes} array.
+     * offset and length of those bytes. Otherwise 0.
      */
-    private int pendingEntry;
+    private int pendingOffset, pendingLength;
 
     /**
      * Index of the next entry available in {@link #entriesForCodes}.
@@ -169,11 +229,6 @@ final class LZW extends CompressionChannel {
      */
     private byte[] stringsFromCode;
 
-    /**
-     * Whether the {@link #EOI_CODE} has been found.
-     */
-    private boolean done;
-
     /**
      * Creates a new channel which will decompress data from the given input.
      * The {@link #setInputRegion(long, long)} method must be invoked after 
construction
@@ -184,11 +239,10 @@ final class LZW extends CompressionChannel {
     public LZW(final ChannelDataInput input) {
         super(input);
         indexOfFreeEntry = FIRST_ADAPTATIVE_CODE;
-        entriesForCodes  = new int [(1 << MAX_CODE_SIZE) - OFFSET_TO_MAXIMUM];
-        stringsFromCode  = new byte[32 * 1024];         // Dynamically 
expanded if needed.
+        entriesForCodes  = new int[(1 << MAX_CODE_SIZE) - OFFSET_TO_MAXIMUM];
+        stringsFromCode  = new byte[3 << (MAX_CODE_SIZE + STRING_ALIGNMENT)];  
 // Dynamically expanded if needed.
         for (int i=0; i < (1 << Byte.SIZE); i++) {
-            entriesForCodes[i] = (i << LOWEST_OFFSET_BIT) | 1;
-            stringsFromCode[i] = (byte) i;
+            stringsFromCode[i << STRING_ALIGNMENT] = (byte) i;
         }
     }
 
@@ -203,18 +257,21 @@ final class LZW extends CompressionChannel {
     public void setInputRegion(final long start, final long byteCount) throws 
IOException {
         super.setInputRegion(start, byteCount);
         clearTable();
-        previousEntry = 0;
-        pendingEntry  = 0;
-        done          = false;
+        previousCode  = 0;
+        pendingOffset = 0;
+        pendingLength = 0;
     }
 
     /**
      * Clears the {@link #entriesForCodes} table.
      */
     private void clearTable() {
+        for (int i=0; i<CLEAR_CODE; i++) {
+            entriesForCodes[i] = (i << LOWEST_OFFSET_BIT) | 1;
+        }
         Arrays.fill(entriesForCodes, FIRST_ADAPTATIVE_CODE, indexOfFreeEntry, 
0);
         indexOfFreeEntry  = FIRST_ADAPTATIVE_CODE;
-        indexOfFreeString = 1 << Byte.SIZE;
+        indexOfFreeString = (1 << Byte.SIZE) << STRING_ALIGNMENT;
         codeSize          = MIN_CODE_SIZE;
     }
 
@@ -228,40 +285,34 @@ final class LZW extends CompressionChannel {
     @Override
     public int read(final ByteBuffer target) throws IOException {
         final int start = target.position();
+        int previousCode = this.previousCode;
         /*
          * If a previous invocation of this method was unable to write some 
data
          * because the target buffer was full, write these remaining data 
first.
          */
-        if (pendingEntry != 0) {
-            final int remaining    = target.remaining();
-            final int stringOffset = offset(pendingEntry);
-            final int stringLength = length(pendingEntry);
-            target.put(stringsFromCode, stringOffset, Math.min(stringLength, 
remaining));
-            if (stringLength <= remaining) {
-                pendingEntry = 0;
-                if (done) {
-                    return stringLength;
-                }
-            } else {
-                pendingEntry = offsetAndLength(stringOffset + remaining, 
stringLength - remaining);
-                return remaining;           // Can not write more than what we 
just wrote.
+        if (pendingLength != 0) {
+            final int n = Math.min(pendingLength, target.remaining());
+            target.put(stringsFromCode, pendingOffset, n);
+            pendingOffset += n;
+            pendingLength -= n;
+            if (pendingLength != 0 || previousCode == EOI_CODE) {
+                return n;
             }
-        } else if (done |= finished()) {
+        } else if (previousCode == EOI_CODE || finished()) {
             return -1;
         }
         /*
          * Below is adapted from TIFF version 6 specification, section 13, LZW 
Decoding.
          * Comments such as "InitializeTable()" refer to methods in TIFF 
specification.
-         * The body is a little bit more complex because codes in [0 … 255] 
range are
-         * handled as a special case instead of stored in the 
`entriesForCodes` table.
          */
-        int stringOffset = offset(previousEntry);
-        int stringLength = length(previousEntry);
+        int entryForCode = entriesForCodes[previousCode];
+        int stringOffset = offset(entryForCode);
+        int stringLength = length(entryForCode);
         int maximumIndex = (1 << codeSize) - OFFSET_TO_MAXIMUM;
         int code;
         while ((code = (int) input.readBits(codeSize)) != EOI_CODE) {       // 
GetNextCode()
             if (code == CLEAR_CODE) {
-                clearTable();       // InitializeTable()
+                clearTable();                                               // 
InitializeTable()
                 maximumIndex = (1 << MIN_CODE_SIZE) - OFFSET_TO_MAXIMUM;
                 /*
                  * We should not have consecutive clear codes, but it is easy 
to check for safety.
@@ -281,14 +332,21 @@ final class LZW extends CompressionChannel {
                  * but for a single byte.
                  */
                 stringLength = 1;
-                stringOffset = code;                    // OldCode = Code
+                stringOffset = code << STRING_ALIGNMENT;
+                entryForCode = entriesForCodes[code];
+                assert offsetAndLength(stringOffset, 1) == entryForCode : code;
+                assert Byte.toUnsignedInt(stringsFromCode[stringOffset]) == 
code : code;
                 if (target.hasRemaining()) {
-                    target.put((byte) code);            // 
WriteString(StringFromCode(Code))
+                    target.put((byte) code);                // 
WriteString(StringFromCode(Code))
                 } else {
-                    pendingEntry = offsetAndLength(code, 1);
+                    pendingOffset = stringOffset;
+                    pendingLength = stringLength;
                     break;
                 }
             } else {
+                assert entryForCode == entriesForCodes[previousCode] : 
previousCode;
+                assert stringOffset == offset(entryForCode) : stringOffset;
+                assert stringLength == length(entryForCode) : stringLength;
                 /*
                  * Cases for all codes after the first code following 
`CLEAR_CODE`.
                  * Those cases will add a new entry in the `stringsFromCode` 
table.
@@ -300,18 +358,28 @@ final class LZW extends CompressionChannel {
                  * Those two branches are identical except for the last 
argument (Code or OldCode).
                  * We do the common part now before `stringOffset` and 
`stringLength` are modified.
                  * The last byte will be added later, after we determined its 
value.
+                 *
+                 * Conceptually, creating a new entry requires copying the old 
entry before to append a byte.
+                 * However we try to avoid some copies by pre-allocating extra 
spaces with each new entry.
+                 * The `PREALLOCATED_SPACE_IS_USED_MASK` bit tells if we can 
append a byte in-place after
+                 * `OldCode` (without copy).
                  */
-                final int newOffset   = indexOfFreeString;
-                final int newLength   = stringLength + 1;
-                final int lastNewByte = newOffset + stringLength;
-                if (lastNewByte >= stringsFromCode.length) {
-                    final int capacity = Math.min(indexOfFreeString * 2, 
OFFSET_LIMIT);
-                    if (lastNewByte >= capacity) {
-                        throw new IOException("Dictionary overflow");
+                final boolean allocate    = 
newEntryNeedsAllocation(entryForCode);
+                final int     newOffset   = allocate ? indexOfFreeString : 
stringOffset;
+                final int     lastNewByte = newOffset + stringLength;
+                if (allocate) {
+                    indexOfFreeString = lastNewByte + (~lastNewByte & 
LENGTH_MASK_FOR_ALLOCATE) + 1;
+                    assert (indexOfFreeString & LENGTH_MASK_FOR_ALLOCATE) == 0 
: stringLength;
+                    if (indexOfFreeString > stringsFromCode.length) {
+                        final int capacity = Math.min(indexOfFreeString * 2, 
OFFSET_LIMIT);
+                        if (indexOfFreeString >= capacity) {
+                            throw new IOException("Dictionary overflow");
+                        }
+                        stringsFromCode = Arrays.copyOf(stringsFromCode, 
capacity);
                     }
-                    stringsFromCode = Arrays.copyOf(stringsFromCode, capacity);
+                    System.arraycopy(stringsFromCode, stringOffset, 
stringsFromCode, newOffset, stringLength);
                 }
-                System.arraycopy(stringsFromCode, stringOffset, 
stringsFromCode, newOffset, stringLength);
+                entriesForCodes[previousCode] = entryForCode | 
PREALLOCATED_SPACE_IS_USED_MASK;
                 /*
                  * Determine the sequence of bytes to write.
                  * Pseudo-codes in TIFF specification for the 2 conditional 
branches:
@@ -319,21 +387,23 @@ final class LZW extends CompressionChannel {
                  *     WriteString(StringFromCode(Code))
                  *     WriteString(<the entry to be added in the table>)
                  */
-                final int entryForCode = entriesForCodes[code];
+                final int newLength = stringLength + 1;
+                final int newEntry  = offsetAndLength(newOffset, newLength);
+                entryForCode = entriesForCodes[code];
                 if (entryForCode != 0) {                                       
 // if (IsInTable(Code))
                     stringOffset = offset(entryForCode);                       
 // StringFromCode(Code)
                     stringLength = length(entryForCode);
                 } else {
                     stringOffset = newOffset;       // StringFromCode(OldCode) 
+ FirstChar(StringFromCode(OldCode)
                     stringLength = newLength;
+                    entryForCode = newEntry;
                     /*
                      * In well-formed LZW stream, we should have `code == 
indexOfFreeEntry` here.
                      * However some invalid values are found in practice. We 
need `code` to refer
                      * to the entry that we add to the dictionary, otherwise 
inconsistencies will
-                     * happen during the next iteration (when using 
`OldCode`). This is implicitly
-                     * ensured by not using `code` directly in next iteration, 
but reusing instead
-                     * `stringOffset` and `stringLength`.
+                     * happen during the next iteration (when using 
`previousCode`).
                      */
+                    code = indexOfFreeEntry;
                 }
                 /*
                  * Add the missing byte in the new entry. That byte is 
`FirstChar(StringFromCode(Code | OldCode)))`.
@@ -342,10 +412,10 @@ final class LZW extends CompressionChannel {
                  * the copy done above), which is also the string that we are 
about to write. So at this point, the
                  * two branches can be executed by the same code.
                  */
+                assert stringsFromCode[newOffset] == 
stringsFromCode[offset(entriesForCodes[previousCode])] : code;
                 stringsFromCode[lastNewByte] = stringsFromCode[stringOffset];  
   // + FirstChar(StringFromCode(…))
-                indexOfFreeString = lastNewByte + 1;
                 try {
-                    entriesForCodes[indexOfFreeEntry] = 
offsetAndLength(newOffset, newLength);
+                    entriesForCodes[indexOfFreeEntry] = newEntry;
                 } catch (ArrayIndexOutOfBoundsException e) {
                     throw (IOException) unexpectedData().initCause(e);
                 }
@@ -370,13 +440,14 @@ final class LZW extends CompressionChannel {
                 if (stringLength <= target.remaining()) {
                     target.put(stringsFromCode, stringOffset, stringLength);
                 } else {
-                    pendingEntry = offsetAndLength(stringOffset, stringLength);
+                    pendingOffset = stringOffset;
+                    pendingLength = stringLength;
                     break;
                 }
             }
+            previousCode = code;            // OldCode = Code
         }
-        previousEntry = offsetAndLength(stringOffset, stringLength);
-        done = (code == EOI_CODE);
+        this.previousCode = code;
         return target.position() - start;
     }

Reply via email to