nsivabalan commented on code in PR #6883:
URL: https://github.com/apache/hudi/pull/6883#discussion_r989615334
##########
hudi-client/hudi-spark-client/src/test/java/org/apache/hudi/io/storage/row/TestHoodieInternalRowParquetWriter.java:
##########
@@ -68,44 +72,55 @@ public void tearDown() throws Exception {
@ParameterizedTest
@ValueSource(booleans = {true, false})
- public void endToEndTest(boolean parquetWriteLegacyFormatEnabled) throws
Exception {
+ public void testProperWriting(boolean parquetWriteLegacyFormatEnabled)
throws Exception {
+ // Generate inputs
+ Dataset<Row> inputRows = SparkDatasetTestUtils.getRandomRows(sqlContext,
100,
+ HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH, false);
+ StructType schema = inputRows.schema();
+
+ List<InternalRow> rows = SparkDatasetTestUtils.toInternalRows(inputRows,
SparkDatasetTestUtils.ENCODER);
+
HoodieWriteConfig.Builder writeConfigBuilder =
SparkDatasetTestUtils.getConfigBuilder(basePath, timelineServicePort);
- for (int i = 0; i < 5; i++) {
- // init write support and parquet config
- HoodieRowParquetWriteSupport writeSupport =
getWriteSupport(writeConfigBuilder, hadoopConf,
parquetWriteLegacyFormatEnabled);
- HoodieWriteConfig cfg = writeConfigBuilder.build();
- HoodieParquetConfig<HoodieRowParquetWriteSupport> parquetConfig = new
HoodieParquetConfig<>(writeSupport,
- CompressionCodecName.SNAPPY, cfg.getParquetBlockSize(),
cfg.getParquetPageSize(), cfg.getParquetMaxFileSize(),
- writeSupport.getHadoopConf(), cfg.getParquetCompressionRatio(),
cfg.parquetDictionaryEnabled());
-
- // prepare path
- String fileId = UUID.randomUUID().toString();
- Path filePath = new Path(basePath + "/" + fileId);
- String partitionPath =
HoodieTestDataGenerator.DEFAULT_FIRST_PARTITION_PATH;
- metaClient.getFs().mkdirs(new Path(basePath));
-
- // init writer
- HoodieInternalRowParquetWriter writer = new
HoodieInternalRowParquetWriter(filePath, parquetConfig);
-
- // generate input
- int size = 10 + RANDOM.nextInt(100);
- // Generate inputs
- Dataset<Row> inputRows = SparkDatasetTestUtils.getRandomRows(sqlContext,
size, partitionPath, false);
- List<InternalRow> internalRows =
SparkDatasetTestUtils.toInternalRows(inputRows, SparkDatasetTestUtils.ENCODER);
-
- // issue writes
- for (InternalRow internalRow : internalRows) {
- writer.write(internalRow);
- }
- // close the writer
- writer.close();
+ HoodieRowParquetWriteSupport writeSupport =
getWriteSupport(writeConfigBuilder, hadoopConf,
parquetWriteLegacyFormatEnabled);
+ HoodieWriteConfig cfg = writeConfigBuilder.build();
+ HoodieParquetConfig<HoodieRowParquetWriteSupport> parquetConfig = new
HoodieParquetConfig<>(writeSupport,
+ CompressionCodecName.SNAPPY, cfg.getParquetBlockSize(),
cfg.getParquetPageSize(), cfg.getParquetMaxFileSize(),
Review Comment:
can we use default compression based on our config values. so that when we
switch our default sometime, these tests will test those codec
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]