cshuo commented on code in PR #17751:
URL: https://github.com/apache/hudi/pull/17751#discussion_r2654951015
##########
hudi-common/src/main/java/org/apache/hudi/common/schema/HoodieSchema.java:
##########
@@ -478,6 +485,120 @@ public static HoodieSchema createUUID() {
return new HoodieSchema(uuidSchema);
}
+ /**
+ * Creates an unshredded Variant schema.
+ * Unshredded variants have both metadata and value fields as required
(non-nullable) binary fields.
+ *
+ * @return a new HoodieSchema.Variant representing an unshredded variant
+ */
+ public static HoodieSchema.Variant createVariant() {
+ return createVariant(null, null, null);
+ }
+
+ /**
+ * Creates an unshredded Variant schema with the specified name and
namespace.
+ *
+ * @param name the variant record name (can be null, defaults to
"variant")
+ * @param namespace the namespace (can be null)
+ * @param doc the documentation (can be null)
+ * @return a new HoodieSchema.Variant representing an unshredded variant
+ */
+ public static HoodieSchema.Variant createVariant(String name, String
namespace, String doc) {
+ String variantName = (name != null && !name.isEmpty()) ? name :
VariantLogicalType.VARIANT_LOGICAL_TYPE_NAME;
+
+ // Create metadata field (required bytes)
+ HoodieSchemaField metadataField = HoodieSchemaField.of(
+ Variant.VARIANT_METADATA_FIELD,
+ HoodieSchema.create(HoodieSchemaType.BYTES),
+ "Variant metadata component",
+ null
+ );
+
+ // Create value field (required bytes)
+ HoodieSchemaField valueField = HoodieSchemaField.of(
+ Variant.VARIANT_VALUE_FIELD,
+ HoodieSchema.create(HoodieSchemaType.BYTES),
+ "Variant value component",
+ null
+ );
+
+ List<HoodieSchemaField> fields = Arrays.asList(metadataField, valueField);
+
+ Schema recordSchema = Schema.createRecord(variantName, doc, namespace,
false);
+ List<Schema.Field> avroFields = fields.stream()
+ .map(HoodieSchemaField::getAvroField)
+ .collect(Collectors.toList());
+ recordSchema.setFields(avroFields);
+
+ // Add Variant logical type
+ VariantLogicalType.variant().addToSchema(recordSchema);
+
+ return new HoodieSchema.Variant(recordSchema);
+ }
+
+ /**
+ * Creates a shredded Variant schema with an optional typed_value field.
+ * Shredded variants have metadata (required), value (optional/nullable),
and typed_value (optional) fields.
+ *
+ * @param typedValueSchema the schema for the typed_value field (can be null
if typed_value is not needed)
+ * @return a new HoodieSchema.Variant representing a shredded variant
+ */
+ public static HoodieSchema.Variant createVariantShredded(HoodieSchema
typedValueSchema) {
Review Comment:
Do we need include shredding information in type/schema layer? IIUC, it's
more about read/write optimization mechanism, which can be inferred or fetched
from configuration during reading or writing.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]