[
https://issues.apache.org/jira/browse/HUDI-2394?page=com.atlassian.jira.plugin.system.issuetabpanels:comment-tabpanel&focusedCommentId=17411590#comment-17411590
]
ASF GitHub Bot commented on HUDI-2394:
--------------------------------------
vinothchandar commented on a change in pull request #3592:
URL: https://github.com/apache/hudi/pull/3592#discussion_r703923530
##########
File path:
hudi-client/hudi-client-common/src/main/java/org/apache/hudi/config/HoodieWriteConfig.java
##########
@@ -413,6 +414,17 @@
.withDocumentation("Whether to include '_hoodie_operation' in the
metadata fields. "
+ "Once enabled, all the changes of a record are persisted to the
delta log directly without merge");
+ public static final ConfigProperty<String> FILEID_PREFIX_PROVIDER_CLASS =
ConfigProperty
+ .key("hoodie.fileid.prefix.provider.class")
+ .defaultValue(RandomFileIdPrefixProvider.class.getName())
+ .withDocumentation("File Id Prefix provider class, that implements
`org.apache.hudi.fileid.FileIdPrefixProvider`");
+
+ public static final ConfigProperty<String> INSERT_AVOID_TRANSITION_INFLIGHT
= ConfigProperty
Review comment:
nts: could this be avoided
##########
File path:
hudi-client/hudi-client-common/src/main/java/org/apache/hudi/config/HoodieWriteConfig.java
##########
@@ -413,6 +414,17 @@
.withDocumentation("Whether to include '_hoodie_operation' in the
metadata fields. "
+ "Once enabled, all the changes of a record are persisted to the
delta log directly without merge");
+ public static final ConfigProperty<String> FILEID_PREFIX_PROVIDER_CLASS =
ConfigProperty
Review comment:
add `since` etc?
##########
File path:
hudi-client/hudi-java-client/src/main/java/org/apache/hudi/table/action/commit/JavaBulkInsertHelper.java
##########
@@ -102,12 +106,16 @@ public static JavaBulkInsertHelper newInstance() {
:
JavaBulkInsertInternalPartitionerFactory.get(config.getBulkInsertSortMode());
repartitionedRecords = (List<HoodieRecord<T>>)
partitioner.repartitionRecords(dedupedRecords, parallelism);
- String idPfx = FSUtils.createNewFileIdPfx();
+ FileIdPrefixProvider fileIdPrefixProvider = (FileIdPrefixProvider)
ReflectionUtils.loadClass(
+ config.getFileIdPrefixProviderClassName(),
+ config.getProps());
List<WriteStatus> writeStatuses = new ArrayList<>();
- new JavaLazyInsertIterable<>(repartitionedRecords.iterator(), true,
config, instantTime, table, idPfx,
- table.getTaskContextSupplier(), new
CreateHandleFactory<>()).forEachRemaining(writeStatuses::addAll);
+ new JavaLazyInsertIterable<>(repartitionedRecords.iterator(), true,
Review comment:
nts: re-examine the file id generation
##########
File path:
hudi-common/src/main/java/org/apache/hudi/fileid/KafkaConnectFileIdPrefixProvider.java
##########
@@ -0,0 +1,69 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hudi.fileid;
+
+import org.apache.hudi.exception.HoodieException;
+
+import org.apache.log4j.LogManager;
+import org.apache.log4j.Logger;
+
+import javax.xml.bind.DatatypeConverter;
Review comment:
this is platform portable?
##########
File path:
hudi-client/hudi-java-client/src/main/java/org/apache/hudi/client/HoodieJavaWriteClient.java
##########
@@ -153,11 +155,24 @@ public boolean commit(String instantTime,
throw new HoodieNotSupportedException("BulkInsert is not supported in
HoodieJavaClient");
}
+ public void preBulkWrite(String instantTime) {
Review comment:
rename: preBulkInsert
##########
File path:
hudi-common/src/main/java/org/apache/hudi/common/util/CommitUtils.java
##########
@@ -134,4 +136,24 @@ private static HoodieCommitMetadata
buildMetadataFromStats(List<HoodieWriteStat>
}
return fileIdToPath;
}
+
+ /**
+ * Get the Metadata from the latest commit file.
+ * @param metaClient The {@link HoodieTableMetaClient} to get access to the
meta data.
+ * @return An Optional {@link HoodieCommitMetadata} containing the meta data
from the latest commit file.
+ */
+ public static Option<HoodieCommitMetadata>
getCommitMetadataForLatestInstant(HoodieTableMetaClient metaClient) {
+ HoodieTimeline timeline =
metaClient.getActiveTimeline().getCommitsTimeline().filterCompletedInstants();
Review comment:
what if there are other completed instants on the timeline, like a
compaction or clustering. they ll not have the commit offsets.
##########
File path:
hudi-common/src/main/java/org/apache/hudi/fileid/FileIdPrefixProvider.java
##########
@@ -0,0 +1,36 @@
+/*
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements. See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership. The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License. You may obtain a copy of the License at
+ *
+ * http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing, software
+ * distributed under the License is distributed on an "AS IS" BASIS,
+ * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied.
+ * See the License for the specific language governing permissions and
+ * limitations under the License.
+ */
+
+package org.apache.hudi.fileid;
Review comment:
nts: rename package name?
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]
> [Kafka Connect Mileston 1] Implement kafka connect for immutable data
> ---------------------------------------------------------------------
>
> Key: HUDI-2394
> URL: https://issues.apache.org/jira/browse/HUDI-2394
> Project: Apache Hudi
> Issue Type: Sub-task
> Reporter: Rajesh Mahindra
> Priority: Major
> Labels: pull-request-available
>
> Implement kafka connect for immutable data using Bulk inserts
--
This message was sent by Atlassian Jira
(v8.3.4#803005)