voonhous commented on code in PR #18065:
URL: https://github.com/apache/hudi/pull/18065#discussion_r3426761424
##########
hudi-hadoop-common/src/main/java/org/apache/hudi/io/storage/hadoop/HoodieAvroFileWriterFactory.java:
##########
@@ -140,9 +142,42 @@ private HoodieAvroWriteSupport
getHoodieAvroWriteSupport(HoodieSchema schema,
StorageConfiguration storageConf,
boolean
enableBloomFilter) {
Option<BloomFilter> filter = enableBloomFilter ?
Option.of(createBloomFilter(config)) : Option.empty();
+ HoodieSchema effectiveSchema =
HoodieAvroWriteSupport.generateEffectiveSchema(schema, config);
+ // Work on a copy so we never mutate the shared config's internal
Properties.
+ Properties props = TypedProperties.copy(config.getProps());
+ // Auto-detect variant shredding provider from classpath if not explicitly
configured
+ if (!props.containsKey(PARQUET_VARIANT_SHREDDING_PROVIDER_CLASS.key())) {
+ String detected = detectShreddingProvider();
+ if (detected != null) {
+ props.setProperty(PARQUET_VARIANT_SHREDDING_PROVIDER_CLASS.key(),
detected);
+ }
+ }
return (HoodieAvroWriteSupport) ReflectionUtils.loadClass(
config.getStringOrDefault(HoodieStorageConfig.HOODIE_AVRO_WRITE_SUPPORT_CLASS),
new Class<?>[] {MessageType.class, HoodieSchema.class, Option.class,
Properties.class},
- getAvroSchemaConverter((Configuration)
storageConf.unwrapAs(Configuration.class)).convert(schema), schema, filter,
config.getProps());
+ // Build the Parquet schema from the effective (possibly shredded)
schema so the message type
+ // matches the records actually written - a shredded variant has a
nullable value and a
+ // typed_value column; converting the original schema would mark value
REQUIRED and drop
+ // typed_value, failing the write with "Null-value for required field:
value".
+ getAvroSchemaConverter((Configuration)
storageConf.unwrapAs(Configuration.class)).convert(effectiveSchema), schema,
filter, props);
+ }
+
+ /**
+ * Auto-detect a {@link org.apache.hudi.avro.VariantShreddingProvider}
implementation
+ * available on the classpath. Returns the fully-qualified class name if
found, or null.
+ */
+ private static String detectShreddingProvider() {
+ String[] candidates = {
Review Comment:
Renamed to detectShreddingProviderClass to reflect that it returns a
fully-qualified class name, not a provider instance.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]