nsivabalan commented on a change in pull request #1834:
URL: https://github.com/apache/hudi/pull/1834#discussion_r469922118
##########
File path:
hudi-spark/src/main/java/org/apache/hudi/keygen/BuiltinKeyGenerator.java
##########
@@ -85,71 +84,40 @@ public final HoodieKey getKey(GenericRecord record) {
}).collect(Collectors.toList());
}
- @Override
- public void initializeRowKeyGenerator(StructType structType, String
structName, String recordNamespace) {
- // parse simple feilds
- getRecordKeyFields().stream()
- .filter(f -> !(f.contains(".")))
- .forEach(f -> {
- if (structType.getFieldIndex(f).isDefined()) {
- recordKeyPositions.put(f, Collections.singletonList((Integer)
(structType.getFieldIndex(f).get())));
- } else {
- throw new HoodieKeyException("recordKey value not found for field:
\"" + f + "\"");
- }
- });
- // parse nested fields
- getRecordKeyFields().stream()
- .filter(f -> f.contains("."))
- .forEach(f -> recordKeyPositions.put(f,
RowKeyGeneratorHelper.getNestedFieldIndices(structType, f, true)));
- // parse simple fields
- if (getPartitionPathFields() != null) {
- getPartitionPathFields().stream().filter(f -> !f.isEmpty()).filter(f ->
!(f.contains(".")))
+ void buildFieldPositionMapIfNeeded(StructType structType) {
+ if (this.structType == null) {
+ // parse simple fields
+ getRecordKeyFields().stream()
+ .filter(f -> !(f.contains(".")))
.forEach(f -> {
if (structType.getFieldIndex(f).isDefined()) {
- partitionPathPositions.put(f,
- Collections.singletonList((Integer)
(structType.getFieldIndex(f).get())));
+ recordKeyPositions.put(f, Collections.singletonList((Integer)
(structType.getFieldIndex(f).get())));
} else {
- partitionPathPositions.put(f, Collections.singletonList(-1));
+ throw new HoodieKeyException("recordKey value not found for
field: \"" + f + "\"");
}
});
// parse nested fields
- getPartitionPathFields().stream().filter(f -> !f.isEmpty()).filter(f ->
f.contains("."))
- .forEach(f -> partitionPathPositions.put(f,
- RowKeyGeneratorHelper.getNestedFieldIndices(structType, f,
false)));
- }
- this.structName = structName;
- this.structType = structType;
- this.recordNamespace = recordNamespace;
- }
-
- /**
- * Fetch record key from {@link Row}.
- *
- * @param row instance of {@link Row} from which record key is requested.
- * @return the record key of interest from {@link Row}.
- */
- @Override
- public String getRecordKey(Row row) {
- if (null == converterFn) {
- converterFn = AvroConversionHelper.createConverterToAvro(structType,
structName, recordNamespace);
+ getRecordKeyFields().stream()
+ .filter(f -> f.contains("."))
+ .forEach(f -> recordKeyPositions.put(f,
RowKeyGeneratorHelper.getNestedFieldIndices(structType, f, true)));
+ // parse simple fields
+ if (getPartitionPathFields() != null) {
+ getPartitionPathFields().stream().filter(f -> !f.isEmpty()).filter(f
-> !(f.contains(".")))
+ .forEach(f -> {
+ if (structType.getFieldIndex(f).isDefined()) {
+ partitionPathPositions.put(f,
+ Collections.singletonList((Integer)
(structType.getFieldIndex(f).get())));
+ } else {
+ partitionPathPositions.put(f, Collections.singletonList(-1));
+ }
+ });
+ // parse nested fields
+ getPartitionPathFields().stream().filter(f -> !f.isEmpty()).filter(f
-> f.contains("."))
+ .forEach(f -> partitionPathPositions.put(f,
+ RowKeyGeneratorHelper.getNestedFieldIndices(structType, f,
false)));
+ }
+ this.structType = structType;
Review comment:
may I know where is the structType being used ?
----------------------------------------------------------------
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
For queries about this service, please contact Infrastructure at:
[email protected]