This is an automated email from the ASF dual-hosted git repository.

shangxinli pushed a commit to branch parquet-1.12.x
in repository https://gitbox.apache.org/repos/asf/parquet-mr.git


The following commit(s) were added to refs/heads/parquet-1.12.x by this push:
     new 6e72dd4  PARQUET-2073: Fix estimate remaining row count in 
ColumnWriteStoreBase. (#922)
6e72dd4 is described below

commit 6e72dd463aed5eeabb87090cb200695755cd6a92
Author: Jiang Yang <[email protected]>
AuthorDate: Mon Aug 16 17:23:04 2021 +0800

    PARQUET-2073: Fix estimate remaining row count in ColumnWriteStoreBase. 
(#922)
---
 .../main/java/org/apache/parquet/column/impl/ColumnWriteStoreBase.java | 3 ++-
 1 file changed, 2 insertions(+), 1 deletion(-)

diff --git 
a/parquet-column/src/main/java/org/apache/parquet/column/impl/ColumnWriteStoreBase.java
 
b/parquet-column/src/main/java/org/apache/parquet/column/impl/ColumnWriteStoreBase.java
index a5e7836..8cfdace 100644
--- 
a/parquet-column/src/main/java/org/apache/parquet/column/impl/ColumnWriteStoreBase.java
+++ 
b/parquet-column/src/main/java/org/apache/parquet/column/impl/ColumnWriteStoreBase.java
@@ -237,10 +237,11 @@ abstract class ColumnWriteStoreBase implements 
ColumnWriteStore {
       } else {
         rowCountForNextRowCountCheck = min(rowCountForNextRowCountCheck, 
writer.getRowsWrittenSoFar() + pageRowCountLimit);
       }
+      //estimate remaining row count by previous input for next row count check
       long rowsToFillPage =
           usedMem == 0 ?
               props.getMaxRowCountForPageSizeCheck()
-              : (long) rows / usedMem * remainingMem;
+              : rows * remainingMem / usedMem;
       if (rowsToFillPage < minRecordToWait) {
         minRecordToWait = rowsToFillPage;
       }

Reply via email to