This is an automated email from the ASF dual-hosted git repository.
gangwu pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/parquet-java.git
The following commit(s) were added to refs/heads/master by this push:
new 976e2d23b GH-3163: Reduce memory and time overhead of
ParquetRewriterTests (#3164)
976e2d23b is described below
commit 976e2d23b37a389f58520d0dc9cdf9936fdf6825
Author: Rahul Sharma <[email protected]>
AuthorDate: Tue Mar 4 15:41:44 2025 +0100
GH-3163: Reduce memory and time overhead of ParquetRewriterTests (#3164)
---
.../hadoop/rewrite/ParquetRewriterTest.java | 28 +++++++++++++++-------
1 file changed, 19 insertions(+), 9 deletions(-)
diff --git
a/parquet-hadoop/src/test/java/org/apache/parquet/hadoop/rewrite/ParquetRewriterTest.java
b/parquet-hadoop/src/test/java/org/apache/parquet/hadoop/rewrite/ParquetRewriterTest.java
index c1da97c40..a2cb72176 100644
---
a/parquet-hadoop/src/test/java/org/apache/parquet/hadoop/rewrite/ParquetRewriterTest.java
+++
b/parquet-hadoop/src/test/java/org/apache/parquet/hadoop/rewrite/ParquetRewriterTest.java
@@ -107,8 +107,8 @@ import org.junit.runners.Parameterized;
@RunWith(Parameterized.class)
public class ParquetRewriterTest {
- private final int numRecord = 100000;
- private final Configuration conf = new Configuration();
+ private final int numRecord;
+ private final Configuration conf;
private final ParquetConfiguration parquetConf = new
PlainParquetConfiguration();
private final ParquetProperties.WriterVersion writerVersion;
private final IndexCache.CacheStrategy indexCacheStrategy;
@@ -122,21 +122,31 @@ public class ParquetRewriterTest {
private final EncryptionTestFile
gzipEncryptionTestFileWithoutBloomFilterColumn;
private final EncryptionTestFile
uncompressedEncryptionTestFileWithoutBloomFilterColumn;
- @Parameterized.Parameters(name = "WriterVersion = {0}, IndexCacheStrategy =
{1}, UsingHadoop = {2}")
+ @Parameterized.Parameters(
+ name =
+ "WriterVersion = {0}, IndexCacheStrategy = {1}, UsingHadoop = {2},
numRecord = {3}, rowsPerPage = {4}")
public static Object[][] parameters() {
+ final int DefaultNumRecord = 10000;
+ final int DefaultRowsPerPage = DefaultNumRecord / 5;
return new Object[][] {
- {"v1", "NONE", true},
- {"v1", "PREFETCH_BLOCK", true},
- {"v2", "PREFETCH_BLOCK", true},
- {"v2", "PREFETCH_BLOCK", false}
+ {"v1", "NONE", true, DefaultNumRecord, DefaultRowsPerPage},
+ {"v1", "PREFETCH_BLOCK", true, DefaultNumRecord, DefaultRowsPerPage},
+ {"v2", "PREFETCH_BLOCK", true, DefaultNumRecord, DefaultRowsPerPage},
+ {"v2", "PREFETCH_BLOCK", false, DefaultNumRecord, DefaultRowsPerPage}
};
}
- public ParquetRewriterTest(String writerVersion, String indexCacheStrategy,
boolean usingHadoop)
+ public ParquetRewriterTest(
+ String writerVersion, String indexCacheStrategy, boolean _usingHadoop,
int _numRecord, int rowsPerPage)
throws IOException {
this.writerVersion =
ParquetProperties.WriterVersion.fromString(writerVersion);
this.indexCacheStrategy =
IndexCache.CacheStrategy.valueOf(indexCacheStrategy);
- this.usingHadoop = usingHadoop;
+ this.usingHadoop = _usingHadoop;
+ this.numRecord = _numRecord;
+
+ Configuration _conf = new Configuration();
+ _conf.set("parquet.page.row.count.limit", Integer.toString(rowsPerPage));
+ this.conf = _conf;
MessageType testSchema = createSchema();
this.gzipEncryptionTestFileWithoutBloomFilterColumn = new
TestFileBuilder(conf, testSchema)