[ 
https://issues.apache.org/jira/browse/PARQUET-2357?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=17774284#comment-17774284
 ] 

ASF GitHub Bot commented on PARQUET-2357:
-----------------------------------------

wgtmac commented on code in PR #1165:
URL: https://github.com/apache/parquet-mr/pull/1165#discussion_r1355929638


##########
parquet-common/src/main/java/org/apache/parquet/bytes/CapacityByteArrayOutputStream.java:
##########
@@ -193,18 +192,15 @@ private void addSlab(int minimumSize) {
     this.currentSlab = allocator.allocate(nextSlabSize);
     this.slabs.add(currentSlab);
     this.bytesAllocated = Math.addExact(this.bytesAllocated, nextSlabSize);
-    this.currentSlabIndex = 0;
   }
 
   @Override
   public void write(int b) {
     if (!currentSlab.hasRemaining()) {
       addSlab(1);
     }
-    currentSlab.put(currentSlabIndex, (byte) b);
-    currentSlabIndex += 1;
-    currentSlab.position(currentSlabIndex);
-    bytesUsed = Math.addExact(bytesUsed, 1);
+    currentSlab.put((byte) b);
+    bytesUsed += 1;

Review Comment:
   ```suggestion
       bytesUsed = Math.addExact(bytesUsed, 1);
   ```



##########
parquet-common/src/main/java/org/apache/parquet/bytes/CapacityByteArrayOutputStream.java:
##########
@@ -214,21 +210,16 @@ public void write(byte b[], int off, int len) {
       throw new IndexOutOfBoundsException(
           String.format("Given byte array of size %d, with requested 
length(%d) and offset(%d)", b.length, len, off));
     }
-    if (len >= currentSlab.remaining()) {
+    if (len > currentSlab.remaining()) {
       final int length1 = currentSlab.remaining();
       currentSlab.put(b, off, length1);
-      bytesUsed = Math.addExact(bytesUsed, length1);
-      currentSlabIndex += length1;
       final int length2 = len - length1;
       addSlab(length2);
       currentSlab.put(b, off + length1, length2);
-      currentSlabIndex = length2;
-      bytesUsed = Math.addExact(bytesUsed, length2);
     } else {
       currentSlab.put(b, off, len);
-      currentSlabIndex += len;
-      bytesUsed = Math.addExact(bytesUsed, len);
     }
+    bytesUsed += len;

Review Comment:
   ```suggestion
       bytesUsed = Math.addExact(bytesUsed, len);
   ```



##########
parquet-common/src/main/java/org/apache/parquet/bytes/CapacityByteArrayOutputStream.java:
##########
@@ -193,18 +192,15 @@ private void addSlab(int minimumSize) {
     this.currentSlab = allocator.allocate(nextSlabSize);
     this.slabs.add(currentSlab);
     this.bytesAllocated = Math.addExact(this.bytesAllocated, nextSlabSize);
-    this.currentSlabIndex = 0;
   }
 
   @Override
   public void write(int b) {
     if (!currentSlab.hasRemaining()) {
       addSlab(1);
     }
-    currentSlab.put(currentSlabIndex, (byte) b);
-    currentSlabIndex += 1;
-    currentSlab.position(currentSlabIndex);
-    bytesUsed = Math.addExact(bytesUsed, 1);
+    currentSlab.put((byte) b);
+    bytesUsed += 1;

Review Comment:
   It would be good to keep the overflow check, just in case.



##########
parquet-common/src/main/java/org/apache/parquet/bytes/CapacityByteArrayOutputStream.java:
##########
@@ -214,21 +210,16 @@ public void write(byte b[], int off, int len) {
       throw new IndexOutOfBoundsException(
           String.format("Given byte array of size %d, with requested 
length(%d) and offset(%d)", b.length, len, off));
     }
-    if (len >= currentSlab.remaining()) {
+    if (len > currentSlab.remaining()) {
       final int length1 = currentSlab.remaining();
       currentSlab.put(b, off, length1);
-      bytesUsed = Math.addExact(bytesUsed, length1);
-      currentSlabIndex += length1;
       final int length2 = len - length1;
       addSlab(length2);
       currentSlab.put(b, off + length1, length2);
-      currentSlabIndex = length2;
-      bytesUsed = Math.addExact(bytesUsed, length2);
     } else {
       currentSlab.put(b, off, len);
-      currentSlabIndex += len;
-      bytesUsed = Math.addExact(bytesUsed, len);
     }
+    bytesUsed += len;

Review Comment:
   ditto





> Modest refactor of CapacityByteArrayOutputStream
> ------------------------------------------------
>
>                 Key: PARQUET-2357
>                 URL: https://issues.apache.org/jira/browse/PARQUET-2357
>             Project: Parquet
>          Issue Type: Improvement
>          Components: parquet-mr
>            Reporter: Feng Jiajie
>            Priority: Minor
>             Fix For: 1.14.0
>
>
> Optimization for the CapacityByteArrayOutputStream class:
>  # The functionality of {{currentSlabIndex}} is the same as 
> {{{}currentSlab.position(){}}}, so there is no need to maintain the 
> {{currentSlabIndex}} variable.
>  # When writing an array of length equal to the remaining capacity of the 
> buffer, there is no need to expand to a new buffer.
>  # If the {{addSlab}} operation has already implemented safeguards using 
> {{Math.addExact}} to prevent overflow of {{bytesAllocated}} and 
> {{{}bytesUsed{}}}, it is unnecessary to perform additional checks during the 
> {{write}} operation.



--
This message was sent by Atlassian Jira
(v8.20.10#820010)

Reply via email to