This is an automated email from the ASF dual-hosted git repository. desruisseaux pushed a commit to branch geoapi-4.0 in repository https://gitbox.apache.org/repos/asf/sis.git
commit 35b5576064b43b90df8959e4e71adc1491c6b734 Author: Martin Desruisseaux <[email protected]> AuthorDate: Fri Jul 8 13:04:34 2022 +0200 Allocate memory for new entries by blocks of 4 bytes instead of doing an unconditional allocation for each new entry. It reduces the amount of calls to `System.arraycopy(…)` by a factor close to 4 and also reduces memory consumption. The performance improvement was not as expected; this new version is even a bit slower than the one in previous commit. It is a little bit surprising because a code block with branching, bounds checks and copies should be executed less often (the `if (allocate) {…}` block), and the cost in the rest of the code is mostly a few bit twiddling, which should be cheap. Maybe it is related to the way HotSpot tries to optimize, in which case the benchmark may change in any future Java version. We keep this new LZW version for now because of its lower memory usage (almost a factor 4). --- .../apache/sis/internal/storage/inflater/LZW.java | 195 ++++++++++++++------- 1 file changed, 133 insertions(+), 62 deletions(-) diff --git a/storage/sis-geotiff/src/main/java/org/apache/sis/internal/storage/inflater/LZW.java b/storage/sis-geotiff/src/main/java/org/apache/sis/internal/storage/inflater/LZW.java index a1b37f7e01..1431d28afc 100644 --- a/storage/sis-geotiff/src/main/java/org/apache/sis/internal/storage/inflater/LZW.java +++ b/storage/sis-geotiff/src/main/java/org/apache/sis/internal/storage/inflater/LZW.java @@ -96,6 +96,40 @@ final class LZW extends CompressionChannel { */ private static final int LENGTH_MASK = (1 << LOWEST_OFFSET_BIT) - 1; + /** + * Number of bits in an offset that are always 0 and consequently do not need to be stored. + * An intentional consequence of this restriction is that size of blocks allocated in the + * {@link #stringsFromCode} array must be multiples of {@literal (1 << STRING_ALIGNMENT)}. + * It makes possible to use the extra size for growing a string up to that amount of bytes + * without copying it. + * + * <div class="note"><b>Note:</b> + * doing allocations only by blocks of 2² = 4 bytes may seem a waste of memory, but actually + * it reduces memory usage a lot (almost a factor 4) because of the copies avoided. + * We tried with alignment values 1, 2, 3 and found that 2 seems optimal.</div> + */ + private static final int STRING_ALIGNMENT = 2; + + /** + * Mask for a bit in an {@link #entriesForCodes} element for telling whether the extra space allocated in the + * {@link #stringsFromCode} array has already been used by another entry. If yes (1), then that space can not + * be used by new entry. Instead, the new entry will need to allocate a new space. + * + * <p>Note: {@link #newEntryNeedsAllocation(int)} implementation assumes that this bit is the sign bit.</p> + */ + private static final int PREALLOCATED_SPACE_IS_USED_MASK = 1 << (Integer.SIZE - 1); + + /** + * The mask to apply on an {@link #entriesForCodes} element for getting the compressed offset (before shifting). + */ + private static final int OFFSET_MASK = (PREALLOCATED_SPACE_IS_USED_MASK - 1) & ~LENGTH_MASK; + + /** + * The shift to apply on a compressed offset (after application of {@link #OFFSET_MASK}) + * for getting the uncompressed offset. + */ + private static final int OFFSET_SHIFT = LOWEST_OFFSET_BIT - STRING_ALIGNMENT; + /** * Extracts the number of bytes of an entry stored in the {@link #stringsFromCode} array. * @@ -113,23 +147,49 @@ final class LZW extends CompressionChannel { * @return index of the first byte to read in {@link #stringsFromCode} array. */ private static int offset(final int element) { - return element >>> LOWEST_OFFSET_BIT; + return (element & OFFSET_MASK) >>> OFFSET_SHIFT; } /** * Encodes an offset together with its length. */ private static int offsetAndLength(final int offset, final int length) { - final int element = (offset << LOWEST_OFFSET_BIT) | length; + final int element = (offset << OFFSET_SHIFT) | length; assert offset(element) == offset : offset; assert length(element) == length : length; return element; } /** - * Maximal value + 1 that the offset can take. The offset takes all the bits after the length. + * Maximal value + 1 that the offset can take. The compressed offset takes all the bits after the length, + * minus one bit that we keep for the {@link #PREALLOCATED_SPACE_IS_USED_MASK} flag. Note that compressed + * offsets are multiplied by {@literal 1 << STRING_ALIGNMENT} for getting the actual offset. + */ + private static final int OFFSET_LIMIT = 1 << (Integer.SIZE - 1 - OFFSET_SHIFT); + + /** + * A mask used for detecting when a new allocation is required. + * If {@code (length & LENGTH_MASK_FOR_ALLOCATE) == 0} and assuming that + * length is always incremented by 1, then a new allocation is necessary. */ - private static final int OFFSET_LIMIT = 1 << (Integer.SIZE - LOWEST_OFFSET_BIT); + private static final int LENGTH_MASK_FOR_ALLOCATE = (1 << STRING_ALIGNMENT) - 1; + + /** + * Returns {@code true} if all the space allocated for the given entry is already used. + * This is true if at least one of the following conditions is true: + * + * <ul> + * <li>The {@link #PREALLOCATED_SPACE_IS_USED_MASK} is set, in which case value is negative.</li> + * <li>All the extra-space allowed by {@link #STRING_ALIGNMENT} is used, in which case the lowest + * bits of the length are all zero.</li> + * </ul> + * + * @param element an element of the {@link #entriesForCodes} array. + * @return whether all the space for that entry is already used. + */ + private static boolean newEntryNeedsAllocation(final int element) { + return (element & (PREALLOCATED_SPACE_IS_USED_MASK | LENGTH_MASK_FOR_ALLOCATE)) <= 0; + } /** * Pointers to byte sequences for a code in the {@link #entriesForCodes} array. @@ -139,16 +199,16 @@ final class LZW extends CompressionChannel { private final int[] entriesForCodes; /** - * Offset and length of the sequence of bytes associated to the code in previous iteration. - * This is a value from {@link #entriesForCodes} array. + * Last code found in previous iteration. This is a valid index in the {@link #entriesForCodes} array. + * A {@link #EOI_CODE} value means that the decompression is finished. */ - private int previousEntry; + private int previousCode; /** * If some bytes could not be written in previous {@code read(…)} execution because the target buffer was full, - * offset and length of those bytes. Otherwise 0. This is a value from {@link #entriesForCodes} array. + * offset and length of those bytes. Otherwise 0. */ - private int pendingEntry; + private int pendingOffset, pendingLength; /** * Index of the next entry available in {@link #entriesForCodes}. @@ -169,11 +229,6 @@ final class LZW extends CompressionChannel { */ private byte[] stringsFromCode; - /** - * Whether the {@link #EOI_CODE} has been found. - */ - private boolean done; - /** * Creates a new channel which will decompress data from the given input. * The {@link #setInputRegion(long, long)} method must be invoked after construction @@ -184,11 +239,10 @@ final class LZW extends CompressionChannel { public LZW(final ChannelDataInput input) { super(input); indexOfFreeEntry = FIRST_ADAPTATIVE_CODE; - entriesForCodes = new int [(1 << MAX_CODE_SIZE) - OFFSET_TO_MAXIMUM]; - stringsFromCode = new byte[32 * 1024]; // Dynamically expanded if needed. + entriesForCodes = new int[(1 << MAX_CODE_SIZE) - OFFSET_TO_MAXIMUM]; + stringsFromCode = new byte[3 << (MAX_CODE_SIZE + STRING_ALIGNMENT)]; // Dynamically expanded if needed. for (int i=0; i < (1 << Byte.SIZE); i++) { - entriesForCodes[i] = (i << LOWEST_OFFSET_BIT) | 1; - stringsFromCode[i] = (byte) i; + stringsFromCode[i << STRING_ALIGNMENT] = (byte) i; } } @@ -203,18 +257,21 @@ final class LZW extends CompressionChannel { public void setInputRegion(final long start, final long byteCount) throws IOException { super.setInputRegion(start, byteCount); clearTable(); - previousEntry = 0; - pendingEntry = 0; - done = false; + previousCode = 0; + pendingOffset = 0; + pendingLength = 0; } /** * Clears the {@link #entriesForCodes} table. */ private void clearTable() { + for (int i=0; i<CLEAR_CODE; i++) { + entriesForCodes[i] = (i << LOWEST_OFFSET_BIT) | 1; + } Arrays.fill(entriesForCodes, FIRST_ADAPTATIVE_CODE, indexOfFreeEntry, 0); indexOfFreeEntry = FIRST_ADAPTATIVE_CODE; - indexOfFreeString = 1 << Byte.SIZE; + indexOfFreeString = (1 << Byte.SIZE) << STRING_ALIGNMENT; codeSize = MIN_CODE_SIZE; } @@ -228,40 +285,34 @@ final class LZW extends CompressionChannel { @Override public int read(final ByteBuffer target) throws IOException { final int start = target.position(); + int previousCode = this.previousCode; /* * If a previous invocation of this method was unable to write some data * because the target buffer was full, write these remaining data first. */ - if (pendingEntry != 0) { - final int remaining = target.remaining(); - final int stringOffset = offset(pendingEntry); - final int stringLength = length(pendingEntry); - target.put(stringsFromCode, stringOffset, Math.min(stringLength, remaining)); - if (stringLength <= remaining) { - pendingEntry = 0; - if (done) { - return stringLength; - } - } else { - pendingEntry = offsetAndLength(stringOffset + remaining, stringLength - remaining); - return remaining; // Can not write more than what we just wrote. + if (pendingLength != 0) { + final int n = Math.min(pendingLength, target.remaining()); + target.put(stringsFromCode, pendingOffset, n); + pendingOffset += n; + pendingLength -= n; + if (pendingLength != 0 || previousCode == EOI_CODE) { + return n; } - } else if (done |= finished()) { + } else if (previousCode == EOI_CODE || finished()) { return -1; } /* * Below is adapted from TIFF version 6 specification, section 13, LZW Decoding. * Comments such as "InitializeTable()" refer to methods in TIFF specification. - * The body is a little bit more complex because codes in [0 … 255] range are - * handled as a special case instead of stored in the `entriesForCodes` table. */ - int stringOffset = offset(previousEntry); - int stringLength = length(previousEntry); + int entryForCode = entriesForCodes[previousCode]; + int stringOffset = offset(entryForCode); + int stringLength = length(entryForCode); int maximumIndex = (1 << codeSize) - OFFSET_TO_MAXIMUM; int code; while ((code = (int) input.readBits(codeSize)) != EOI_CODE) { // GetNextCode() if (code == CLEAR_CODE) { - clearTable(); // InitializeTable() + clearTable(); // InitializeTable() maximumIndex = (1 << MIN_CODE_SIZE) - OFFSET_TO_MAXIMUM; /* * We should not have consecutive clear codes, but it is easy to check for safety. @@ -281,14 +332,21 @@ final class LZW extends CompressionChannel { * but for a single byte. */ stringLength = 1; - stringOffset = code; // OldCode = Code + stringOffset = code << STRING_ALIGNMENT; + entryForCode = entriesForCodes[code]; + assert offsetAndLength(stringOffset, 1) == entryForCode : code; + assert Byte.toUnsignedInt(stringsFromCode[stringOffset]) == code : code; if (target.hasRemaining()) { - target.put((byte) code); // WriteString(StringFromCode(Code)) + target.put((byte) code); // WriteString(StringFromCode(Code)) } else { - pendingEntry = offsetAndLength(code, 1); + pendingOffset = stringOffset; + pendingLength = stringLength; break; } } else { + assert entryForCode == entriesForCodes[previousCode] : previousCode; + assert stringOffset == offset(entryForCode) : stringOffset; + assert stringLength == length(entryForCode) : stringLength; /* * Cases for all codes after the first code following `CLEAR_CODE`. * Those cases will add a new entry in the `stringsFromCode` table. @@ -300,18 +358,28 @@ final class LZW extends CompressionChannel { * Those two branches are identical except for the last argument (Code or OldCode). * We do the common part now before `stringOffset` and `stringLength` are modified. * The last byte will be added later, after we determined its value. + * + * Conceptually, creating a new entry requires copying the old entry before to append a byte. + * However we try to avoid some copies by pre-allocating extra spaces with each new entry. + * The `PREALLOCATED_SPACE_IS_USED_MASK` bit tells if we can append a byte in-place after + * `OldCode` (without copy). */ - final int newOffset = indexOfFreeString; - final int newLength = stringLength + 1; - final int lastNewByte = newOffset + stringLength; - if (lastNewByte >= stringsFromCode.length) { - final int capacity = Math.min(indexOfFreeString * 2, OFFSET_LIMIT); - if (lastNewByte >= capacity) { - throw new IOException("Dictionary overflow"); + final boolean allocate = newEntryNeedsAllocation(entryForCode); + final int newOffset = allocate ? indexOfFreeString : stringOffset; + final int lastNewByte = newOffset + stringLength; + if (allocate) { + indexOfFreeString = lastNewByte + (~lastNewByte & LENGTH_MASK_FOR_ALLOCATE) + 1; + assert (indexOfFreeString & LENGTH_MASK_FOR_ALLOCATE) == 0 : stringLength; + if (indexOfFreeString > stringsFromCode.length) { + final int capacity = Math.min(indexOfFreeString * 2, OFFSET_LIMIT); + if (indexOfFreeString >= capacity) { + throw new IOException("Dictionary overflow"); + } + stringsFromCode = Arrays.copyOf(stringsFromCode, capacity); } - stringsFromCode = Arrays.copyOf(stringsFromCode, capacity); + System.arraycopy(stringsFromCode, stringOffset, stringsFromCode, newOffset, stringLength); } - System.arraycopy(stringsFromCode, stringOffset, stringsFromCode, newOffset, stringLength); + entriesForCodes[previousCode] = entryForCode | PREALLOCATED_SPACE_IS_USED_MASK; /* * Determine the sequence of bytes to write. * Pseudo-codes in TIFF specification for the 2 conditional branches: @@ -319,21 +387,23 @@ final class LZW extends CompressionChannel { * WriteString(StringFromCode(Code)) * WriteString(<the entry to be added in the table>) */ - final int entryForCode = entriesForCodes[code]; + final int newLength = stringLength + 1; + final int newEntry = offsetAndLength(newOffset, newLength); + entryForCode = entriesForCodes[code]; if (entryForCode != 0) { // if (IsInTable(Code)) stringOffset = offset(entryForCode); // StringFromCode(Code) stringLength = length(entryForCode); } else { stringOffset = newOffset; // StringFromCode(OldCode) + FirstChar(StringFromCode(OldCode) stringLength = newLength; + entryForCode = newEntry; /* * In well-formed LZW stream, we should have `code == indexOfFreeEntry` here. * However some invalid values are found in practice. We need `code` to refer * to the entry that we add to the dictionary, otherwise inconsistencies will - * happen during the next iteration (when using `OldCode`). This is implicitly - * ensured by not using `code` directly in next iteration, but reusing instead - * `stringOffset` and `stringLength`. + * happen during the next iteration (when using `previousCode`). */ + code = indexOfFreeEntry; } /* * Add the missing byte in the new entry. That byte is `FirstChar(StringFromCode(Code | OldCode)))`. @@ -342,10 +412,10 @@ final class LZW extends CompressionChannel { * the copy done above), which is also the string that we are about to write. So at this point, the * two branches can be executed by the same code. */ + assert stringsFromCode[newOffset] == stringsFromCode[offset(entriesForCodes[previousCode])] : code; stringsFromCode[lastNewByte] = stringsFromCode[stringOffset]; // + FirstChar(StringFromCode(…)) - indexOfFreeString = lastNewByte + 1; try { - entriesForCodes[indexOfFreeEntry] = offsetAndLength(newOffset, newLength); + entriesForCodes[indexOfFreeEntry] = newEntry; } catch (ArrayIndexOutOfBoundsException e) { throw (IOException) unexpectedData().initCause(e); } @@ -370,13 +440,14 @@ final class LZW extends CompressionChannel { if (stringLength <= target.remaining()) { target.put(stringsFromCode, stringOffset, stringLength); } else { - pendingEntry = offsetAndLength(stringOffset, stringLength); + pendingOffset = stringOffset; + pendingLength = stringLength; break; } } + previousCode = code; // OldCode = Code } - previousEntry = offsetAndLength(stringOffset, stringLength); - done = (code == EOI_CODE); + this.previousCode = code; return target.position() - start; }
