steveloughran commented on pull request #2584:
URL: https://github.com/apache/hadoop/pull/2584#issuecomment-1086028402


   * rebase against trunk
   * s3a input stream will drain the inner stream asynchronously in 
seek/unbuffer related calls if the # of bytes to drain is > a new 
config/openFile option `fs.s3a.input.async.drain.threshold`; default is 16000 
bytes, which seems a good number in my long haul experiments. also draining is 
into a 16k byte buffer, which speeds it up.
   
   This aims to reduce the cost of seeking where bytes do need to be discarded.
   
   the time to drain is also measured. it's the max time which can be high, as 
it is time to read bytes remaining in the current read. abort is less expensive 
in the actual abort...its the negotiation of a new TLS channel later which 
costs.
   
   ```
   (stream_read_remote_stream_aborted.mean=(samples=2, sum=2, mean=1.0000))
   (stream_read_remote_stream_drain.mean=(samples=13, sum=29, mean=2.2308)));
   
   (stream_read_remote_stream_aborted.max=1)
   (stream_read_remote_stream_drain.max=25));
   ```
   
   the `ITestS3AInputStreamPerformance` suite also sets the file length in 
every openFile call. so skips all creation, saves a few seconds overall, 
showing it is tangible.
   
   full stats from a remote testrun
   
   ```
   
   2022-04-01 15:49:10,887 [JUnit] INFO  s3a.AbstractS3ATestBase 
(AbstractS3ATestBase.java:dumpFileSystemIOStatistics(123)) - Aggregate 
FileSystem Statistics counters=((action_executor_acquired=1)
   (action_file_opened=8)
   (action_http_get_request=15)
   (action_http_head_request=25)
   (audit_request_execution=76)
   (audit_span_creation=50)
   (directories_created=8)
   (directories_deleted=7)
   (fake_directories_deleted=1)
   (files_created=1)
   (files_deleted=1)
   (object_bulk_delete_request=2)
   (object_delete_objects=9)
   (object_delete_request=7)
   (object_list_request=18)
   (object_metadata_request=25)
   (object_put_bytes=32768)
   (object_put_request=9)
   (object_put_request_completed=9)
   (op_create=1)
   (op_delete=8)
   (op_get_file_status=9)
   (op_mkdirs=8)
   (op_open=8)
   (store_io_request=78)
   (stream_aborted=2)
   (stream_read_bytes=93473433)
   (stream_read_bytes_backwards_on_seek=12713984)
   (stream_read_bytes_discarded_in_abort=43889622)
   (stream_read_bytes_discarded_in_close=252395)
   (stream_read_close_operations=8)
   (stream_read_closed=13)
   (stream_read_fully_operations=8)
   (stream_read_opened=15)
   (stream_read_operations=6124)
   (stream_read_operations_incomplete=6071)
   (stream_read_remote_stream_aborted=2)
   (stream_read_remote_stream_drain=13)
   (stream_read_seek_backward_operations=4)
   (stream_read_seek_bytes_discarded=45092691)
   (stream_read_seek_bytes_skipped=55054163)
   (stream_read_seek_forward_operations=175)
   (stream_read_seek_operations=179)
   (stream_read_seek_policy_changed=9)
   (stream_read_total_bytes=138818519)
   (stream_write_block_uploads=1)
   (stream_write_bytes=32768)
   (stream_write_total_data=65536));
   
   gauges=((stream_write_block_uploads_pending=1));
   
   minimums=((action_executor_acquired.min=0)
   (action_file_opened.min=0)
   (action_http_get_request.min=31)
   (action_http_head_request.min=21)
   (object_bulk_delete_request.min=37)
   (object_delete_request.min=28)
   (object_list_request.min=27)
   (object_put_request.min=60)
   (op_create.min=61)
   (op_delete.min=28)
   (op_get_file_status.min=35)
   (op_mkdirs.min=155)
   (stream_read_remote_stream_aborted.min=1)
   (stream_read_remote_stream_drain.min=0));
   
   maximums=((action_executor_acquired.max=0)
   (action_file_opened.max=0)
   (action_http_get_request.max=730)
   (action_http_head_request.max=1663)
   (object_bulk_delete_request.max=84)
   (object_delete_request.max=35)
   (object_list_request.max=648)
   (object_put_request.max=205)
   (op_create.max=61)
   (op_delete.max=159)
   (op_get_file_status.max=1669)
   (op_mkdirs.max=769)
   (stream_read_remote_stream_aborted.max=1)
   (stream_read_remote_stream_drain.max=25));
   
   means=((action_executor_acquired.mean=(samples=1, sum=0, mean=0.0000))
   (action_file_opened.mean=(samples=8, sum=0, mean=0.0000))
   (action_http_get_request.mean=(samples=15, sum=2752, mean=183.4667))
   (action_http_head_request.mean=(samples=25, sum=7360, mean=294.4000))
   (object_bulk_delete_request.mean=(samples=2, sum=121, mean=60.5000))
   (object_delete_request.mean=(samples=7, sum=213, mean=30.4286))
   (object_list_request.mean=(samples=18, sum=1520, mean=84.4444))
   (object_put_request.mean=(samples=9, sum=797, mean=88.5556))
   (op_create.mean=(samples=1, sum=61, mean=61.0000))
   (op_delete.mean=(samples=8, sum=373, mean=46.6250))
   (op_get_file_status.mean=(samples=9, sum=6793, mean=754.7778))
   (op_mkdirs.mean=(samples=8, sum=2006, mean=250.7500))
   (stream_read_remote_stream_aborted.mean=(samples=2, sum=2, mean=1.0000))
   (stream_read_remote_stream_drain.mean=(samples=13, sum=29, mean=2.2308)));
   
   ```


-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]



---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to