This is an automated email from the ASF dual-hosted git repository.
gabor pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/parquet-mr.git
The following commit(s) were added to refs/heads/master by this push:
new 5154d04 PARQUET-2073: Fix estimate remaining row count in
ColumnWriteStoreBase. (#922)
5154d04 is described below
commit 5154d04f1ccc21811c0668bf68cfd19f91100907
Author: Jiang Yang <[email protected]>
AuthorDate: Mon Aug 16 17:23:04 2021 +0800
PARQUET-2073: Fix estimate remaining row count in ColumnWriteStoreBase.
(#922)
---
.../main/java/org/apache/parquet/column/impl/ColumnWriteStoreBase.java | 3 ++-
1 file changed, 2 insertions(+), 1 deletion(-)
diff --git
a/parquet-column/src/main/java/org/apache/parquet/column/impl/ColumnWriteStoreBase.java
b/parquet-column/src/main/java/org/apache/parquet/column/impl/ColumnWriteStoreBase.java
index a5e7836..8cfdace 100644
---
a/parquet-column/src/main/java/org/apache/parquet/column/impl/ColumnWriteStoreBase.java
+++
b/parquet-column/src/main/java/org/apache/parquet/column/impl/ColumnWriteStoreBase.java
@@ -237,10 +237,11 @@ abstract class ColumnWriteStoreBase implements
ColumnWriteStore {
} else {
rowCountForNextRowCountCheck = min(rowCountForNextRowCountCheck,
writer.getRowsWrittenSoFar() + pageRowCountLimit);
}
+ //estimate remaining row count by previous input for next row count check
long rowsToFillPage =
usedMem == 0 ?
props.getMaxRowCountForPageSizeCheck()
- : (long) rows / usedMem * remainingMem;
+ : rows * remainingMem / usedMem;
if (rowsToFillPage < minRecordToWait) {
minRecordToWait = rowsToFillPage;
}