wgtmac commented on code in PR #1275:
URL: https://github.com/apache/orc/pull/1275#discussion_r1002858070


##########
c++/src/io/OutputStream.cc:
##########
@@ -95,9 +91,30 @@ namespace orc {
 
   uint64_t BufferedOutputStream::flush() {
     uint64_t dataSize = dataBuffer->size();
+    // flush data buffer into outputStream
+    if (dataBuffer->getBlockNumber() > 0)
     {
-      SCOPED_STOPWATCH(metrics, IOBlockingLatencyUs, IOCount);
-      outputStream->write(dataBuffer->data(), dataSize);
+      uint64_t ioCount = 0;
+      SCOPED_STOPWATCH(metrics, IOBlockingLatencyUs, nullptr);
+      // try to merge adjacent IO requests
+      BlockBuffer::Block mergeBlock = dataBuffer->getBlock(0);
+      for (uint64_t i = 1; i < dataBuffer->getBlockNumber(); ++i) {
+        auto curBlock = dataBuffer->getBlock(i);
+        if (mergeBlock.data + mergeBlock.size == curBlock.data) {
+          mergeBlock.size += curBlock.size;
+        } else {
+          outputStream->write(mergeBlock.data, mergeBlock.size);

Review Comment:
   We may also limit each write not to exceed 
outputStream->getNaturalWriteSize()



##########
c++/src/io/OutputStream.cc:
##########
@@ -95,9 +91,30 @@ namespace orc {
 
   uint64_t BufferedOutputStream::flush() {
     uint64_t dataSize = dataBuffer->size();
+    // flush data buffer into outputStream
+    if (dataBuffer->getBlockNumber() > 0)
     {
-      SCOPED_STOPWATCH(metrics, IOBlockingLatencyUs, IOCount);
-      outputStream->write(dataBuffer->data(), dataSize);
+      uint64_t ioCount = 0;
+      SCOPED_STOPWATCH(metrics, IOBlockingLatencyUs, nullptr);
+      // try to merge adjacent IO requests
+      BlockBuffer::Block mergeBlock = dataBuffer->getBlock(0);
+      for (uint64_t i = 1; i < dataBuffer->getBlockNumber(); ++i) {
+        auto curBlock = dataBuffer->getBlock(i);
+        if (mergeBlock.data + mergeBlock.size == curBlock.data) {

Review Comment:
   This leaves the work to the memory allocator and it may never happen.



##########
c++/src/io/OutputStream.cc:
##########
@@ -37,7 +37,7 @@ namespace orc {
                                     : outputStream(outStream),
                                       blockSize(blockSize_),
                                       metrics(metrics_) {
-    dataBuffer.reset(new DataBuffer<char>(pool));
+    dataBuffer.reset(new BlockBuffer(pool, blockSize));

Review Comment:
   OK, that makes sense.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to