olaf-otto closed pull request #50: WAGON-537 Maven download speed of large
artifacts is slow
URL: https://github.com/apache/maven-wagon/pull/50
This is a PR merged from a forked repository.
As GitHub hides the original diff on merge, it is displayed below for
the sake of provenance:
As this is a foreign pull request (from a fork), the diff is supplied
below (as it won't show otherwise due to GitHub magic):
diff --git
a/wagon-provider-api/src/main/java/org/apache/maven/wagon/AbstractWagon.java
b/wagon-provider-api/src/main/java/org/apache/maven/wagon/AbstractWagon.java
index 4cbf37d7..d420ee80 100644
--- a/wagon-provider-api/src/main/java/org/apache/maven/wagon/AbstractWagon.java
+++ b/wagon-provider-api/src/main/java/org/apache/maven/wagon/AbstractWagon.java
@@ -44,6 +44,9 @@
import java.io.OutputStream;
import java.util.List;
+import static java.lang.Math.max;
+import static java.lang.Math.min;
+
/**
* Implementation of common facilities for Wagon providers.
*
@@ -53,6 +56,24 @@
implements Wagon
{
protected static final int DEFAULT_BUFFER_SIZE = 1024 * 4;
+ protected static final int MAXIMUM_BUFFER_SIZE = 1024 * 512;
+
+ /**
+ * To efficiently buffer data, use a multiple of 4k
+ * as this is likely to match the hardware buffer size of certain
+ * storage devices.
+ */
+ protected static final int BUFFER_SEGMENT_SIZE = 4 * 1024;
+
+ /**
+ * The desired minimum amount of chunks in which a {@link Resource} shall
be
+ * {@link #transfer(Resource, InputStream, OutputStream, int, long)
transferred}.
+ * This corresponds to the minimum times {@link
#fireTransferProgress(TransferEvent, byte[], int)}.
+ * 100 notifications is a conservative value that will lead to small
chunks for
+ * any artifact less that {@link #BUFFER_SEGMENT_SIZE} * {@link
#MINIMUM_AMOUNT_OF_TRANSFER_CHUNKS}
+ * in size.
+ */
+ protected static final int MINIMUM_AMOUNT_OF_TRANSFER_CHUNKS = 100;
protected Repository repository;
@@ -560,7 +581,7 @@ protected void transfer( Resource resource, InputStream
input, OutputStream outp
protected void transfer( Resource resource, InputStream input,
OutputStream output, int requestType, long maxSize )
throws IOException
{
- byte[] buffer = new byte[DEFAULT_BUFFER_SIZE];
+ byte[] buffer = bufferForTransferring( resource );
TransferEvent transferEvent = new TransferEvent( this, resource,
TransferEvent.TRANSFER_PROGRESS, requestType );
transferEvent.setTimestamp( System.currentTimeMillis() );
@@ -568,23 +589,72 @@ protected void transfer( Resource resource, InputStream
input, OutputStream outp
long remaining = maxSize;
while ( remaining > 0 )
{
- // let's safely cast to int because the min value will be lower
than the buffer size.
- int n = input.read( buffer, 0, (int) Math.min( buffer.length,
remaining ) );
+ // Read from the stream, block if necessary until either EOF or
buffer is filled.
+ // Filling the buffer has priority since downstream processors
will significantly degrade i/o
+ // performance if called to frequently (large data streams) as
they perform expensive tasks such as
+ // console output or data integrity checks.
+ int nextByte = input.read();
- if ( n == -1 )
+ if ( nextByte == -1 )
{
break;
}
- fireTransferProgress( transferEvent, buffer, n );
+ buffer[0] = ( byte ) nextByte;
- output.write( buffer, 0, n );
+ // let's safely cast to int because the min value will be lower
than the buffer size.
+ int length = (int) min( buffer.length, remaining ),
+ read = 1;
- remaining -= n;
+ for ( ; read < length ; ++read )
+ {
+ nextByte = input.read();
+ if ( nextByte == -1 )
+ {
+ break;
+ }
+ buffer[read] = ( byte ) nextByte;
+ }
+
+ fireTransferProgress( transferEvent, buffer, read );
+
+ output.write( buffer, 0, read );
+
+ remaining -= read;
}
output.flush();
}
+ /**
+ * Provide a buffer suitably sized for efficiently
+ * {@link #transfer(Resource, InputStream, OutputStream, int, long)
transferring}
+ * the given {@link Resource}. For larger files, larger buffers are
provided such that downstream
+ * {@link #fireTransferProgress(TransferEvent, byte[], int) listeners} are
not notified overly frequently.
+ * For instance, transferring gigabyte-sized resources would result in
millions of notifications when using
+ * only a few kilobytes of buffer, drastically slowing transfer since
transfer progress listeners and
+ * notifications are synchronous and may block, e.g. when writing download
progress status to console.
+ *
+ * @param resource must not be null.
+ * @return a byte buffer suitable for the {@link
Resource#getContentLength() content length} of the resource.
+ */
+ protected byte[] bufferForTransferring( Resource resource )
+ {
+ final long contentLength = resource.getContentLength();
+
+ if ( contentLength <= 0 )
+ {
+ return new byte[DEFAULT_BUFFER_SIZE];
+ }
+
+ final int numberOfBufferSegments = ( ( int ) (
+ contentLength / ( BUFFER_SEGMENT_SIZE *
MINIMUM_AMOUNT_OF_TRANSFER_CHUNKS ) )
+ );
+ final int potentialBufferSize = numberOfBufferSegments *
BUFFER_SEGMENT_SIZE;
+ final int effectiveBufferSize = min( MAXIMUM_BUFFER_SIZE, max(
DEFAULT_BUFFER_SIZE, potentialBufferSize ) );
+
+ return new byte[effectiveBufferSize];
+ }
+
// ----------------------------------------------------------------------
//
// ----------------------------------------------------------------------
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]
With regards,
Apache Git Services