stevenzwu commented on a change in pull request #3181:
URL: https://github.com/apache/iceberg/pull/3181#discussion_r800329610
##########
File path: parquet/src/main/java/org/apache/iceberg/parquet/Parquet.java
##########
@@ -291,32 +301,56 @@ private WriteBuilder
createContextFunc(Function<Map<String, String>, Context> ne
private final int dictionaryPageSize;
private final CompressionCodecName codec;
private final String compressionLevel;
+ private final int rowGroupCheckMinRecordCount;
+ private final int rowGroupCheckMaxRecordCount;
private Context(int rowGroupSize, int pageSize, int dictionaryPageSize,
- CompressionCodecName codec, String compressionLevel) {
+ CompressionCodecName codec, String compressionLevel,
+ int rowGroupCheckMinRecordCount, int
rowGroupCheckMaxRecordCount) {
this.rowGroupSize = rowGroupSize;
this.pageSize = pageSize;
this.dictionaryPageSize = dictionaryPageSize;
this.codec = codec;
this.compressionLevel = compressionLevel;
+ this.rowGroupCheckMinRecordCount = rowGroupCheckMinRecordCount;
+ this.rowGroupCheckMaxRecordCount = rowGroupCheckMaxRecordCount;
}
static Context dataContext(Map<String, String> config) {
- int rowGroupSize = Integer.parseInt(config.getOrDefault(
- PARQUET_ROW_GROUP_SIZE_BYTES,
PARQUET_ROW_GROUP_SIZE_BYTES_DEFAULT));
+ int rowGroupSize = PropertyUtil.propertyAsInt(config,
+ PARQUET_ROW_GROUP_SIZE_BYTES,
PARQUET_ROW_GROUP_SIZE_BYTES_DEFAULT);
+ Preconditions.checkArgument(rowGroupSize > 0,
+ "Row group size must be > 0");
Review comment:
fixed
##########
File path: parquet/src/test/java/org/apache/iceberg/parquet/TestParquet.java
##########
@@ -51,8 +54,15 @@
@Test
public void testRowGroupSizeConfigurable() throws IOException {
- // Without an explicit writer function
- File parquetFile = generateFileWithTwoRowGroups(null).first();
+ Map<String, String> props = ImmutableMap.of(
+ PARQUET_ROW_GROUP_SIZE_BYTES,
+ Integer.toString(4 * Integer.BYTES));
+ // Without an explicit writer function doesn't support
PARQUET_ROW_GROUP_CHECK_MIN_RECORD_COUNT
+ // PARQUET_ROW_GROUP_CHECK_MAX_RECORD_COUNT configs.
+ // Even though row group size is 16 bytes, we still have to write 101
records
+ // as default PARQUET_ROW_GROUP_CHECK_MIN_RECORD_COUNT is 100.
+ File parquetFile = generateFileWithTwoRowGroups(null, 101, props)
+ .first();
Review comment:
fixed
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]