This is an automated email from the ASF dual-hosted git repository.
desruisseaux pushed a commit to branch geoapi-4.0
in repository https://gitbox.apache.org/repos/asf/sis.git
The following commit(s) were added to refs/heads/geoapi-4.0 by this push:
new ba3b3c577b Performance tuning: use the ByteBuffer API for copying
block of data in `CopyFromBytes`. The previous code was assuming that the
amount of bytes to copy is small (typically 1), but that assumption become
wrong when the tile width is larger that the buffer capacity. This commit also
increases the default buffer size.
ba3b3c577b is described below
commit ba3b3c577b239326e1a6aade3239ffe2227fcf06
Author: Martin Desruisseaux <[email protected]>
AuthorDate: Mon Nov 6 11:31:46 2023 +0100
Performance tuning: use the ByteBuffer API for copying block of data in
`CopyFromBytes`.
The previous code was assuming that the amount of bytes to copy is small
(typically 1),
but that assumption become wrong when the tile width is larger that the
buffer capacity.
This commit also increases the default buffer size.
---
.../storage/geotiff/inflater/CopyFromBytes.java | 91 +++++++++-------------
.../storage/geotiff/writer/CompressionChannel.java | 10 ++-
.../apache/sis/io/stream/FileCacheByteChannel.java | 5 +-
.../apache/sis/io/stream/RewindableLineReader.java | 2 +-
.../org/apache/sis/storage/StorageConnector.java | 32 ++++++--
.../main/org/apache/sis/storage/csv/Store.java | 6 +-
.../apache/sis/storage/wkt/FirstKeywordPeek.java | 4 +-
7 files changed, 78 insertions(+), 72 deletions(-)
diff --git
a/endorsed/src/org.apache.sis.storage.geotiff/main/org/apache/sis/storage/geotiff/inflater/CopyFromBytes.java
b/endorsed/src/org.apache.sis.storage.geotiff/main/org/apache/sis/storage/geotiff/inflater/CopyFromBytes.java
index 0ea6f0b333..003f3328c7 100644
---
a/endorsed/src/org.apache.sis.storage.geotiff/main/org/apache/sis/storage/geotiff/inflater/CopyFromBytes.java
+++
b/endorsed/src/org.apache.sis.storage.geotiff/main/org/apache/sis/storage/geotiff/inflater/CopyFromBytes.java
@@ -229,25 +229,20 @@ abstract class CopyFromBytes extends Inflater {
super.uncompressRow();
int skipIndex = 0;
final ByteBuffer source = input.buffer;
- for (int i = chunksPerRow; --i > 0;) { // (chunksPerRow - 1)
iterations.
- int n = elementsPerChunk;
- input.ensureBufferContains(n);
- do bank.put(source.get()); // Number of
iterations should be low (often 1).
- while (--n != 0);
- if (skipAfterChunks != null) {
+ for (int i = chunksPerRow; --i >= 0;) {
+ input.ensureBufferContains(elementsPerChunk);
+ final int limit = source.limit();
+ source.limit(source.position() + elementsPerChunk);
+ bank.put(source);
+ source.limit(limit);
+ /*
+ * It is important to not skip `skipAfterChunks` sample values
on the last iteration.
+ * OTherwise EOF could be thrown if the last pixel is in the
last* column of the tile.
+ */
+ if (i != 0 && skipAfterChunks != null) {
skipIndex = skipAfterChunk(skipIndex);
}
}
- /*
- * Read the last chunk that was not read in above `for` loop, but
without skipping `skipAfterChunks`
- * sample values after. This is necessary for avoiding EOF if the
last pixel to read is in the last
- * column of the tile. The `elementsPerChunk` value here may be
large if `chunksPerRow` is 1.
- */
- input.ensureBufferContains(elementsPerChunk);
- final int limit = source.limit();
- source.limit(source.position() + elementsPerChunk);
- bank.put(source);
- source.limit(limit);
}
}
@@ -275,19 +270,15 @@ abstract class CopyFromBytes extends Inflater {
super.uncompressRow();
int skipIndex = 0;
final ByteBuffer source = input.buffer;
- for (int i = chunksPerRow; --i > 0;) {
- int n = elementsPerChunk;
- input.ensureBufferContains(n * Short.BYTES);
- do bank.put(source.getShort());
- while (--n != 0);
- if (skipAfterChunks != null) {
+ final int n = elementsPerChunk * Short.BYTES;
+ for (int i = chunksPerRow; --i >= 0;) {
+ input.ensureBufferContains(n);
+ bank.put(source.asShortBuffer().limit(elementsPerChunk));
+ source.position(source.position() + n);
+ if (i != 0 && skipAfterChunks != null) {
skipIndex = skipAfterChunk(skipIndex);
}
}
- final int n = elementsPerChunk * Short.BYTES;
- input.ensureBufferContains(n);
- bank.put(source.asShortBuffer().limit(elementsPerChunk));
- source.position(source.position() + n);
}
}
@@ -315,19 +306,15 @@ abstract class CopyFromBytes extends Inflater {
super.uncompressRow();
int skipIndex = 0;
final ByteBuffer source = input.buffer;
- for (int i = chunksPerRow; --i > 0;) {
- int n = elementsPerChunk;
- input.ensureBufferContains(n * Integer.BYTES);
- do bank.put(source.getInt());
- while (--n != 0);
- if (skipAfterChunks != null) {
+ final int n = elementsPerChunk * Integer.BYTES;
+ for (int i = chunksPerRow; --i >= 0;) {
+ input.ensureBufferContains(n);
+ bank.put(source.asIntBuffer().limit(elementsPerChunk));
+ source.position(source.position() + n);
+ if (i != 0 && skipAfterChunks != null) {
skipIndex = skipAfterChunk(skipIndex);
}
}
- final int n = elementsPerChunk * Integer.BYTES;
- input.ensureBufferContains(n);
- bank.put(source.asIntBuffer().limit(elementsPerChunk));
- source.position(source.position() + n);
}
}
@@ -355,19 +342,15 @@ abstract class CopyFromBytes extends Inflater {
super.uncompressRow();
int skipIndex = 0;
final ByteBuffer source = input.buffer;
- for (int i = chunksPerRow; --i > 0;) {
- int n = elementsPerChunk;
- input.ensureBufferContains(n * Float.BYTES);
- do bank.put(source.getFloat());
- while (--n != 0);
- if (skipAfterChunks != null) {
+ final int n = elementsPerChunk * Float.BYTES;
+ for (int i = chunksPerRow; --i >= 0;) {
+ input.ensureBufferContains(n);
+ bank.put(source.asFloatBuffer().limit(elementsPerChunk));
+ source.position(source.position() + n);
+ if (i != 0 && skipAfterChunks != null) {
skipIndex = skipAfterChunk(skipIndex);
}
}
- final int n = elementsPerChunk * Float.BYTES;
- input.ensureBufferContains(n);
- bank.put(source.asFloatBuffer().limit(elementsPerChunk));
- source.position(source.position() + n);
}
}
@@ -395,19 +378,15 @@ abstract class CopyFromBytes extends Inflater {
super.uncompressRow();
int skipIndex = 0;
final ByteBuffer source = input.buffer;
- for (int i = chunksPerRow; --i > 0;) {
- int n = elementsPerChunk;
- input.ensureBufferContains(n * Double.BYTES);
- do bank.put(source.getDouble());
- while (--n != 0);
- if (skipAfterChunks != null) {
+ final int n = elementsPerChunk * Double.BYTES;
+ for (int i = chunksPerRow; --i >= 0;) {
+ input.ensureBufferContains(n);
+ bank.put(source.asDoubleBuffer().limit(elementsPerChunk));
+ source.position(source.position() + n);
+ if (i != 0 && skipAfterChunks != null) {
skipIndex = skipAfterChunk(skipIndex);
}
}
- final int n = elementsPerChunk * Double.BYTES;
- input.ensureBufferContains(n);
- bank.put(source.asDoubleBuffer().limit(elementsPerChunk));
- source.position(source.position() + n);
}
}
}
diff --git
a/endorsed/src/org.apache.sis.storage.geotiff/main/org/apache/sis/storage/geotiff/writer/CompressionChannel.java
b/endorsed/src/org.apache.sis.storage.geotiff/main/org/apache/sis/storage/geotiff/writer/CompressionChannel.java
index cd74d11574..cd84ba6295 100644
---
a/endorsed/src/org.apache.sis.storage.geotiff/main/org/apache/sis/storage/geotiff/writer/CompressionChannel.java
+++
b/endorsed/src/org.apache.sis.storage.geotiff/main/org/apache/sis/storage/geotiff/writer/CompressionChannel.java
@@ -31,6 +31,14 @@ import org.apache.sis.io.stream.ChannelDataOutput;
* @author Martin Desruisseaux (Geomatys)
*/
abstract class CompressionChannel extends PixelChannel {
+ /**
+ * Size of the buffer where to temporarily copy data to compress. The
buffer should be
+ * large enough for allowing compression algorithms and predictors to work
comfortably
+ * (potentially modifying the buffer content in-place). But a too large
value may also
+ * be counter-productive, maybe because it causes more frequent CPU cache
invalidation.
+ */
+ private static final int BUFFER_SIZE =
StorageConnector.DEFAULT_BUFFER_SIZE / 2;
+
/**
* The destination where to write compressed data.
*/
@@ -62,7 +70,7 @@ abstract class CompressionChannel extends PixelChannel {
* estimation because the length will usually be limited by the
maximal value below anyway.
* Those minimal and maximal capacity values are arbitrary.
*/
- return Math.max((int) Math.min(length,
StorageConnector.DEFAULT_BUFFER_SIZE / 2), 1024);
+ return Math.max((int) Math.min(length, BUFFER_SIZE), 1024);
}
/**
diff --git
a/endorsed/src/org.apache.sis.storage/main/org/apache/sis/io/stream/FileCacheByteChannel.java
b/endorsed/src/org.apache.sis.storage/main/org/apache/sis/io/stream/FileCacheByteChannel.java
index 9375504804..0e885b2003 100644
---
a/endorsed/src/org.apache.sis.storage/main/org/apache/sis/io/stream/FileCacheByteChannel.java
+++
b/endorsed/src/org.apache.sis.storage/main/org/apache/sis/io/stream/FileCacheByteChannel.java
@@ -60,8 +60,11 @@ import static
org.apache.sis.storage.base.StoreUtilities.LOGGER;
public abstract class FileCacheByteChannel extends ByteRangeChannel {
/**
* Size of the transfer buffer, in number of bytes.
+ * This value does not need to be as large as {@link
StorageConnector#DEFAULT_BUFFER_SIZE}
+ * because the buffer will be used only for transferring data, with no
computation done on
+ * the content.
*/
- private static final int BUFFER_SIZE =
StorageConnector.DEFAULT_BUFFER_SIZE;
+ private static final int BUFFER_SIZE =
StorageConnector.DEFAULT_BUFFER_SIZE / 4;
/**
* Threshold for implementing a change of position by closing current
connection and opening a new one.
diff --git
a/endorsed/src/org.apache.sis.storage/main/org/apache/sis/io/stream/RewindableLineReader.java
b/endorsed/src/org.apache.sis.storage/main/org/apache/sis/io/stream/RewindableLineReader.java
index 0657e96a97..0db4195ac1 100644
---
a/endorsed/src/org.apache.sis.storage/main/org/apache/sis/io/stream/RewindableLineReader.java
+++
b/endorsed/src/org.apache.sis.storage/main/org/apache/sis/io/stream/RewindableLineReader.java
@@ -45,7 +45,7 @@ public final class RewindableLineReader extends
LineNumberReader {
* This is also the maximal "read ahead limit" that can be passed to
{@link #mark(int)}
* without causing buffer reallocation.
*/
- public static final int BUFFER_SIZE = StorageConnector.DEFAULT_BUFFER_SIZE
/ 2;
+ private static final int BUFFER_SIZE = StorageConnector.READ_AHEAD_LIMIT;
/**
* The input stream, or {@code null} if this reader cannot rewind anymore.
diff --git
a/endorsed/src/org.apache.sis.storage/main/org/apache/sis/storage/StorageConnector.java
b/endorsed/src/org.apache.sis.storage/main/org/apache/sis/storage/StorageConnector.java
index a2bf92f40a..7d48090aee 100644
---
a/endorsed/src/org.apache.sis.storage/main/org/apache/sis/storage/StorageConnector.java
+++
b/endorsed/src/org.apache.sis.storage/main/org/apache/sis/storage/StorageConnector.java
@@ -120,24 +120,40 @@ public class StorageConnector implements Serializable {
* The default size (in bytes) of {@link ByteBuffer}s created by storage
connectors.
* Those buffers are typically created when the specified storage object
is a
* {@link File}, {@link Path}, {@link URL} or {@link URI}.
- * The default buffer size is arbitrary and may change in any future
Apache SIS version.
*
- * <p>Users can override this value by providing a value for {@link
OptionKey#BYTE_BUFFER}.</p>
+ * <p>The default buffer size is arbitrary and may change in any future
Apache SIS version.
+ * The current value is {@value}. Users can override this value by
providing a pre-allocated
+ * buffer with {@link OptionKey#BYTE_BUFFER}.</p>
*
* @see OptionKey#BYTE_BUFFER
*
* @since 1.4
*/
@Configuration
- public static final int DEFAULT_BUFFER_SIZE = 16 * 1024;
+ public static final int DEFAULT_BUFFER_SIZE = 32 * 1024;
/**
- * The read-ahead limit for mark operations.
- * We try to allow as many bytes as contained in buffers of default size.
- * For increasing the chances to meet that goal, this size should be the
- * same than {@link BufferedInputStream} default buffer size.
+ * The read-ahead limit for mark operations before probing input streams.
+ * When the storage is an {@link InputStream} or a {@link Reader}, the
{@link #getStorageAs(Class)} method
+ * marks the current position with this "read ahead limit" value for
resetting the stream to its original
+ * position if the caller does not recognize the stream content.
+ * Implementations of the {@link
DataStoreProvider#probeContent(StorageConnector)} method should avoid
+ * reading more bytes or characters from an {@code InputStream} or from a
{@code Reader} than this value.
+ *
+ * <p>The read ahead limit is arbitrary and may change in any future
Apache SIS version.
+ * However it is guaranteed to be smaller than or equal to {@link
#DEFAULT_BUFFER_SIZE}.
+ * The current value is {@value}.</p>
+ *
+ * <div class="note"><b>Note for maintainer:</b>
+ * this value should not be greater than the {@link BufferedInputStream}
default buffer size.</div>
+ *
+ * @see InputStream#mark(int)
+ * @see Reader#mark(int)
+ *
+ * @since 1.5
*/
- static final int READ_AHEAD_LIMIT = 8 * 1024;
+ @Configuration
+ public static final int READ_AHEAD_LIMIT = 8 * 1024;
/**
* The minimal size of the {@link ByteBuffer} to be created. This size is
used only
diff --git
a/endorsed/src/org.apache.sis.storage/main/org/apache/sis/storage/csv/Store.java
b/endorsed/src/org.apache.sis.storage/main/org/apache/sis/storage/csv/Store.java
index 013e2c1e8f..3ad230072c 100644
---
a/endorsed/src/org.apache.sis.storage/main/org/apache/sis/storage/csv/Store.java
+++
b/endorsed/src/org.apache.sis.storage/main/org/apache/sis/storage/csv/Store.java
@@ -241,7 +241,7 @@ final class Store extends URIDataStore implements
FeatureSet {
Foliation foliation = null;
try {
final List<String> elements = new ArrayList<>();
- source.mark(RewindableLineReader.BUFFER_SIZE);
+ source.mark(StorageConnector.READ_AHEAD_LIMIT);
String line;
while ((line = source.readLine()) != null) {
line = line.trim();
@@ -287,7 +287,7 @@ final class Store extends URIDataStore implements
FeatureSet {
}
}
elements.clear();
- source.mark(RewindableLineReader.BUFFER_SIZE);
+ source.mark(StorageConnector.READ_AHEAD_LIMIT);
}
source.reset(); // Restore position to the first
line after the header.
} catch (IOException e) {
@@ -328,7 +328,7 @@ final class Store extends URIDataStore implements
FeatureSet {
final char c = line.charAt(0);
if (c != COMMENT && c != METADATA) break;
}
- source.mark(RewindableLineReader.BUFFER_SIZE);
+ source.mark(StorageConnector.READ_AHEAD_LIMIT);
}
source.reset(); // Restore position to the first line
after the header.
}
diff --git
a/endorsed/src/org.apache.sis.storage/main/org/apache/sis/storage/wkt/FirstKeywordPeek.java
b/endorsed/src/org.apache.sis.storage/main/org/apache/sis/storage/wkt/FirstKeywordPeek.java
index 9829f6a2bc..4a6e610d22 100644
---
a/endorsed/src/org.apache.sis.storage/main/org/apache/sis/storage/wkt/FirstKeywordPeek.java
+++
b/endorsed/src/org.apache.sis.storage/main/org/apache/sis/storage/wkt/FirstKeywordPeek.java
@@ -45,9 +45,9 @@ public abstract class FirstKeywordPeek {
/**
* The read-ahead limit when reading a text from a {@link Reader}.
- * Should be no more than {@code StorageConnector.DEFAULT_BUFFER_SIZE / 2}.
+ * Should not be greater than {@code StorageConnector.READ_AHEAD_LIMIT /
2}.
*/
- static final int READ_AHEAD_LIMIT = 2048;
+ static final int READ_AHEAD_LIMIT = StorageConnector.READ_AHEAD_LIMIT / 4;
/**
* The comment character to ignore.