This is an automated email from the ASF dual-hosted git repository.

gangwu pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/parquet-java.git


The following commit(s) were added to refs/heads/master by this push:
     new 976e2d23b GH-3163: Reduce memory and time overhead of 
ParquetRewriterTests (#3164)
976e2d23b is described below

commit 976e2d23b37a389f58520d0dc9cdf9936fdf6825
Author: Rahul Sharma <[email protected]>
AuthorDate: Tue Mar 4 15:41:44 2025 +0100

    GH-3163: Reduce memory and time overhead of ParquetRewriterTests (#3164)
---
 .../hadoop/rewrite/ParquetRewriterTest.java        | 28 +++++++++++++++-------
 1 file changed, 19 insertions(+), 9 deletions(-)

diff --git 
a/parquet-hadoop/src/test/java/org/apache/parquet/hadoop/rewrite/ParquetRewriterTest.java
 
b/parquet-hadoop/src/test/java/org/apache/parquet/hadoop/rewrite/ParquetRewriterTest.java
index c1da97c40..a2cb72176 100644
--- 
a/parquet-hadoop/src/test/java/org/apache/parquet/hadoop/rewrite/ParquetRewriterTest.java
+++ 
b/parquet-hadoop/src/test/java/org/apache/parquet/hadoop/rewrite/ParquetRewriterTest.java
@@ -107,8 +107,8 @@ import org.junit.runners.Parameterized;
 @RunWith(Parameterized.class)
 public class ParquetRewriterTest {
 
-  private final int numRecord = 100000;
-  private final Configuration conf = new Configuration();
+  private final int numRecord;
+  private final Configuration conf;
   private final ParquetConfiguration parquetConf = new 
PlainParquetConfiguration();
   private final ParquetProperties.WriterVersion writerVersion;
   private final IndexCache.CacheStrategy indexCacheStrategy;
@@ -122,21 +122,31 @@ public class ParquetRewriterTest {
   private final EncryptionTestFile 
gzipEncryptionTestFileWithoutBloomFilterColumn;
   private final EncryptionTestFile 
uncompressedEncryptionTestFileWithoutBloomFilterColumn;
 
-  @Parameterized.Parameters(name = "WriterVersion = {0}, IndexCacheStrategy = 
{1}, UsingHadoop = {2}")
+  @Parameterized.Parameters(
+      name =
+          "WriterVersion = {0}, IndexCacheStrategy = {1}, UsingHadoop = {2}, 
numRecord = {3}, rowsPerPage = {4}")
   public static Object[][] parameters() {
+    final int DefaultNumRecord = 10000;
+    final int DefaultRowsPerPage = DefaultNumRecord / 5;
     return new Object[][] {
-      {"v1", "NONE", true},
-      {"v1", "PREFETCH_BLOCK", true},
-      {"v2", "PREFETCH_BLOCK", true},
-      {"v2", "PREFETCH_BLOCK", false}
+      {"v1", "NONE", true, DefaultNumRecord, DefaultRowsPerPage},
+      {"v1", "PREFETCH_BLOCK", true, DefaultNumRecord, DefaultRowsPerPage},
+      {"v2", "PREFETCH_BLOCK", true, DefaultNumRecord, DefaultRowsPerPage},
+      {"v2", "PREFETCH_BLOCK", false, DefaultNumRecord, DefaultRowsPerPage}
     };
   }
 
-  public ParquetRewriterTest(String writerVersion, String indexCacheStrategy, 
boolean usingHadoop)
+  public ParquetRewriterTest(
+      String writerVersion, String indexCacheStrategy, boolean _usingHadoop, 
int _numRecord, int rowsPerPage)
       throws IOException {
     this.writerVersion = 
ParquetProperties.WriterVersion.fromString(writerVersion);
     this.indexCacheStrategy = 
IndexCache.CacheStrategy.valueOf(indexCacheStrategy);
-    this.usingHadoop = usingHadoop;
+    this.usingHadoop = _usingHadoop;
+    this.numRecord = _numRecord;
+
+    Configuration _conf = new Configuration();
+    _conf.set("parquet.page.row.count.limit", Integer.toString(rowsPerPage));
+    this.conf = _conf;
 
     MessageType testSchema = createSchema();
     this.gzipEncryptionTestFileWithoutBloomFilterColumn = new 
TestFileBuilder(conf, testSchema)

Reply via email to