Here is a patch for changes. The benchmark results follow.
diff --git a/src/org/tukaani/xz/lz/LZDecoder.java b/src/org/tukaani/xz/lz/LZDecoder.java index 85b2ca1..565209a 100644 --- a/src/org/tukaani/xz/lz/LZDecoder.java +++ b/src/org/tukaani/xz/lz/LZDecoder.java @@ -12,6 +12,7 @@ package org.tukaani.xz.lz; import java.io.DataInputStream; import java.io.IOException; + import org.tukaani.xz.ArrayCache; import org.tukaani.xz.CorruptedInputException; @@ -95,12 +96,39 @@ public final class LZDecoder { if (dist >= pos) back += bufSize; - do { - buf[pos++] = buf[back++]; + buf[pos++] = buf[back++]; + --left; + //then handle in bulk if there is work remaining + while (left > 0) { if (back == bufSize) back = 0; - } while (--left > 0); + //it is possible for the "repeat" to include content which is going + //to be generated here so we have to limit ourselves to how much + //data is already in the buffer (i.e. pos - back). It is also + //possible that "back" can actually be forward in the buffer from + //position, in which case that comparison does not matter + int toCopy = Math.min(left, bufSize - back); + int tmp = pos - back; + if (tmp < toCopy && tmp > 0) { + //if the delta between pos and back is smaller than how much + //there is to copy, then we can safely repeat it all the way + //for what is left + do { + System.arraycopy(buf, back, buf, pos, tmp); + pos += tmp; + left -= tmp; + tmp <<= 1; + } while (left > tmp); + } else { + + System.arraycopy(buf, back, buf, pos, toCopy); + pos += toCopy; + back += toCopy; + left -= toCopy; + } + } + if (full < pos) full = pos; } Here are benchmarks covering jdk 8, 11, and 15. This compares current TRUNK, which has crc64 changes compared to 1.8. For the "real" content, the improvements range from fairly small for the image (~5%), to ~15% for the large xml file. The artificial repeating byte improved by ~40%. jdk 8 64 bit TRUNK (file) Mode Cnt Score Error Units ihe_ovly_pr.dcm avgt 3 0.589 ± 0.007 ms/op image1.dcm avgt 3 384.959 ± 7.477 ms/op large.xml avgt 3 237.317 ± 14.152 ms/op 1 byte rep avgt 3 388.612 ± 12.811 ms/op After ihe_ovly_pr.dcm avgt 3 0.525 ± 0.022 ms/op image1.dcm avgt 3 371.327 ± 23.419 ms/op large.xml avgt 3 190.587 ± 9.545 ms/op 1 byte rep avgt 3 216.481 ± 3.936 ms/op jdk 11 64 bit TRUNK (file) Mode Cnt Score Error Units ihe_ovly_pr.dcm avgt 3 0.662 ± 0.012 ms/op image1.dcm avgt 3 391.644 ± 13.871 ms/op large.xml avgt 3 225.456 ± 16.265 ms/op 1 byte rep avgt 3 387.347 ± 18.811 ms/op After ihe_ovly_pr.dcm avgt 3 0.607 ± 0.187 ms/op image1.dcm avgt 3 369.419 ± 32.400 ms/op large.xml avgt 3 190.580 ± 7.856 ms/op 1 byte rep avgt 3 220.554 ± 8.812 ms/op jdk 15 64 bit TRUNK (file) Mode Cnt Score Error Units ihe_ovly_pr.dcm avgt 3 0.661 ± 0.034 ms/op image1.dcm avgt 3 375.150 ± 22.059 ms/op large.xml avgt 3 219.960 ± 7.347 ms/op 1 byte rep avgt 3 385.606 ± 13.185 ms/op After ihe_ovly_pr.dcm avgt 3 0.609 ± 0.139 ms/op image1.dcm avgt 3 367.665 ± 0.555 ms/op large.xml avgt 3 187.471 ± 56.143 ms/op 1 byte rep avgt 3 217.445 ± 16.682 ms/op jdk 8 32 bit (server) TRUNK (file) Mode Cnt Score Error Units ihe_ovly_pr.dcm avgt 3 0.627 ± 0.046 ms/op image1.dcm avgt 3 457.876 ± 28.759 ms/op large.xml avgt 3 279.929 ± 16.974 ms/op 1 byte rep avgt 3 490.610 ± 67.420 ms/op After ihe_ovly_pr.dcm avgt 3 0.561 ± 0.018 ms/op image1.dcm avgt 3 426.993 ± 12.331 ms/op large.xml avgt 3 228.273 ± 13.871 ms/op 1 byte rep avgt 3 309.623 ± 94.535 ms/op jdk 11 32 bit (server) TRUNK (file) Mode Cnt Score Error Units ihe_ovly_pr.dcm avgt 3 0.855 ± 0.058 ms/op image1.dcm avgt 3 655.695 ± 14.873 ms/op large.xml avgt 3 411.228 ± 3.691 ms/op 1 byte rep avgt 3 696.332 ± 35.806 ms/op After ihe_ovly_pr.dcm avgt 3 0.740 ± 0.011 ms/op image1.dcm avgt 3 616.774 ± 25.305 ms/op large.xml avgt 3 322.130 ± 4.882 ms/op 1 byte rep avgt 3 422.353 ± 17.333 ms/op