codope commented on code in PR #6587:
URL: https://github.com/apache/hudi/pull/6587#discussion_r962489007
##########
hudi-utilities/src/test/java/org/apache/hudi/utilities/sources/TestHoodieIncrSource.java:
##########
@@ -55,20 +66,39 @@ public class TestHoodieIncrSource extends
SparkClientFunctionalTestHarness {
private HoodieTestDataGenerator dataGen;
private HoodieTableMetaClient metaClient;
+ private HoodieTableType tableType = COPY_ON_WRITE;
@BeforeEach
public void setUp() throws IOException {
dataGen = new HoodieTestDataGenerator();
- metaClient = getHoodieMetaClient(hadoopConf(), basePath());
}
- @Test
- public void testHoodieIncrSource() throws IOException {
+ @Override
+ public HoodieTableMetaClient getHoodieMetaClient(Configuration hadoopConf,
String basePath, Properties props) throws IOException {
+ props = HoodieTableMetaClient.withPropertyBuilder()
+ .setTableName(RAW_TRIPS_TEST_NAME)
+ .setTableType(tableType)
+ .setPayloadClass(HoodieAvroPayload.class)
+ .fromProperties(props)
+ .build();
+ return HoodieTableMetaClient.initTableAndGetMetaClient(hadoopConf,
basePath, props);
+ }
+
+ private static Stream<Arguments> tableTypeParams() {
+ return Arrays.stream(new HoodieTableType[][]
{{HoodieTableType.COPY_ON_WRITE},
{HoodieTableType.MERGE_ON_READ}}).map(Arguments::of);
+ }
+
+ @ParameterizedTest
+ @MethodSource("tableTypeParams")
+ public void testHoodieIncrSource(HoodieTableType tableType) throws
IOException {
+ this.tableType = tableType;
+ metaClient = getHoodieMetaClient(hadoopConf(), basePath());
HoodieWriteConfig writeConfig = getConfigBuilder(basePath(), metaClient)
.withArchivalConfig(HoodieArchivalConfig.newBuilder().archiveCommitsWith(2,
3).build())
.withCleanConfig(HoodieCleanConfig.newBuilder().retainCommits(1).build())
+
.withCompactionConfig(HoodieCompactionConfig.newBuilder().withInlineCompaction(true).withMaxNumDeltaCommitsBeforeCompaction(3).build())
.withMetadataConfig(HoodieMetadataConfig.newBuilder()
- .withMaxNumDeltaCommitsBeforeCompaction(1).build())
+ .enable(false).build())
Review Comment:
Why false? Let's keep it default?
##########
hudi-utilities/src/main/java/org/apache/hudi/utilities/sources/helpers/IncrSourceHelper.java:
##########
@@ -73,7 +73,7 @@ public static Pair<String, Pair<String, String>>
calculateBeginAndEndInstants(Ja
HoodieTableMetaClient srcMetaClient =
HoodieTableMetaClient.builder().setConf(jssc.hadoopConfiguration()).setBasePath(srcBasePath).setLoadActiveTimelineOnLoad(true).build();
final HoodieTimeline activeCommitTimeline =
-
srcMetaClient.getActiveTimeline().getCommitTimeline().filterCompletedInstants();
+
srcMetaClient.getCommitsAndCompactionTimeline().filterCompletedInstants();
Review Comment:
Eventually, we should replace this API. Simply use
`metaClient.getActiveTimeline().getWriteTimeline()` as much as possible. I
don't think this API brings any real benefit apart from filtering out certain
types (deltacommit and compaction) for COW table. Anyway, such commits won't be
there for COW table and active timeline has already been loaded by that time.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]