voonhous commented on code in PR #17526:
URL: https://github.com/apache/hudi/pull/17526#discussion_r2605236606


##########
hudi-common/src/main/java/org/apache/hudi/common/schema/HoodieSchemaUtils.java:
##########
@@ -386,4 +386,154 @@ private static Option<Pair<String, HoodieSchemaField>> 
getNestedFieldInternal(Ho
           ));
     }
   }
+
+  /**
+   * Generates a projection schema from the original schema, including only 
the specified fields.
+   * This is equivalent to HoodieAvroUtils.generateProjectionSchema() but 
operates on HoodieSchema.
+   *
+   * @param originalSchema the source schema
+   * @param fieldNames     the list of field names to include in the projection
+   * @return new HoodieSchema containing only the specified fields
+   * @throws IllegalArgumentException if schema is null or not a record type
+   * @since 1.2.0
+   */
+  public static HoodieSchema generateProjectionSchema(HoodieSchema 
originalSchema, List<String> fieldNames) {
+    ValidationUtils.checkArgument(originalSchema != null, "Original schema 
cannot be null");
+    ValidationUtils.checkArgument(fieldNames != null, "Field names cannot be 
null");
+
+    // Delegate to HoodieAvroUtils
+    Schema projectedAvro = 
HoodieAvroUtils.generateProjectionSchema(originalSchema.toAvroSchema(), 
fieldNames);
+    return HoodieSchema.fromAvroSchema(projectedAvro);
+  }
+
+  /**
+   * Prunes the data schema to only include fields that are required by the 
required schema,
+   * plus any mandatory fields specified.
+   * This is equivalent to {@link AvroSchemaUtils#pruneDataSchema(Schema, 
Schema, Set)} but operates on HoodieSchema.
+   *
+   * @param dataSchema      the full data schema
+   * @param requiredSchema  the schema containing required fields
+   * @param mandatoryFields set of field names that must be included regardless
+   * @return new HoodieSchema with pruned fields
+   * @throws IllegalArgumentException if either schema is null
+   * @since 1.2.0
+   */
+  public static HoodieSchema pruneDataSchema(HoodieSchema dataSchema, 
HoodieSchema requiredSchema, Set<String> mandatoryFields) {
+    ValidationUtils.checkArgument(dataSchema != null, "Data schema cannot be 
null");
+    ValidationUtils.checkArgument(requiredSchema != null, "Required schema 
cannot be null");
+
+    Set<String> mandatorySet = mandatoryFields != null ? mandatoryFields : 
Collections.emptySet();
+
+    // Delegate to AvroSchemaUtils
+    Schema prunedAvro = AvroSchemaUtils.pruneDataSchema(
+        dataSchema.toAvroSchema(),
+        requiredSchema.toAvroSchema(),
+        mandatorySet);
+    return HoodieSchema.fromAvroSchema(prunedAvro);
+  }
+
+  /**
+   * Checks if two schemas are projection equivalent (i.e., they have the same 
fields and types
+   * for projection purposes, ignoring certain metadata differences).
+   * This is equivalent to {@link 
AvroSchemaUtils#areSchemasProjectionEquivalent(Schema, Schema)} but operates on 
HoodieSchema.
+   *
+   * @param schema1 the first schema
+   * @param schema2 the second schema
+   * @return true if schemas are projection equivalent
+   * @throws IllegalArgumentException if either schema is null
+   * @since 1.2.0
+   */
+  public static boolean areSchemasProjectionEquivalent(HoodieSchema schema1, 
HoodieSchema schema2) {
+    // Delegate to AvroSchemaUtils
+    return AvroSchemaUtils.areSchemasProjectionEquivalent(schema1 == null ? 
null : schema1.toAvroSchema(), schema2 == null ? null : schema2.toAvroSchema());
+  }
+
+  /**
+   * Adds newFields to the schema. Will add nested fields without duplicating 
the field
+   * For example if your schema is "a.b.{c,e}" and newfields contains 
"a.{b.{d,e},x.y}",
+   * It will stitch them together to be "a.{b.{c,d,e},x.y}
+   * This is equivalent to {@link 
AvroSchemaUtils#appendFieldsToSchemaDedupNested(Schema, List)} but operates on 
HoodieSchema.
+   *
+   * @param schema    the original schema
+   * @param newFields list of new fields to add
+   * @return the updated schema with new fields added
+   */
+  public static HoodieSchema appendFieldsToSchemaDedupNested(HoodieSchema 
schema, List<HoodieSchemaField> newFields) {
+    return 
HoodieSchema.fromAvroSchema(AvroSchemaUtils.appendFieldsToSchemaDedupNested(schema.toAvroSchema(),
+        
newFields.stream().map(HoodieSchemaField::getAvroField).collect(Collectors.toList())));
+  }
+
+  /**
+   * Create a new schema but maintain all meta info from the old schema.
+   * This is equivalent to {@link 
AvroSchemaUtils#createNewSchemaFromFieldsWithReference(Schema, List)} but 
operates on HoodieSchema.
+   *
+   * @param schema schema to get the meta info from
+   * @param fields list of fields in order that will be in the new schema
+   *
+   * @return schema with fields from fields, and metadata from schema
+   */
+  public static HoodieSchema 
createNewSchemaFromFieldsWithReference(HoodieSchema schema, 
List<HoodieSchemaField> fields) {
+    if (schema == null) {
+      throw new IllegalArgumentException("Schema must not be null");
+    }
+    return 
HoodieSchema.fromAvroSchema(AvroSchemaUtils.createNewSchemaFromFieldsWithReference(
+        schema.toAvroSchema(),
+        
fields.stream().map(HoodieSchemaField::getAvroField).collect(Collectors.toList())
+    ));
+  }
+
+  /**
+   * Get gets a field from a record, works on nested fields as well (if you 
provide the whole name, eg: toplevel.nextlevel.child)

Review Comment:
   Addressed this in: #17546



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]

Reply via email to