yihua commented on code in PR #5470:
URL: https://github.com/apache/hudi/pull/5470#discussion_r925003741
##########
hudi-client/hudi-spark-client/src/main/java/org/apache/hudi/client/model/HoodieInternalRow.java:
##########
@@ -24,31 +24,66 @@
import org.apache.spark.sql.catalyst.util.MapData;
import org.apache.spark.sql.types.DataType;
import org.apache.spark.sql.types.Decimal;
+import org.apache.spark.sql.types.StringType$;
import org.apache.spark.unsafe.types.CalendarInterval;
import org.apache.spark.unsafe.types.UTF8String;
+import java.util.Arrays;
+
/**
- * Internal Row implementation for Hoodie Row. It wraps an {@link InternalRow}
and keeps meta columns locally. But the {@link InternalRow}
- * does include the meta columns as well just that {@link HoodieInternalRow}
will intercept queries for meta columns and serve from its
- * copy rather than fetching from {@link InternalRow}.
+ * Hudi internal implementation of the {@link InternalRow} allowing to extend
arbitrary
+ * {@link InternalRow} overlaying Hudi-internal meta-fields on top of it.
+ *
+ * Capable of overlaying meta-fields in both cases: whether original {@link
#row} contains
+ * meta columns or not. This allows to handle following use-cases allowing to
avoid any
+ * manipulation (reshuffling) of the source row, by simply creating new
instance
+ * of {@link HoodieInternalRow} with all the meta-values provided
+ *
+ * <ul>
+ * <li>When meta-fields need to be prepended to the source {@link
InternalRow}</li>
+ * <li>When meta-fields need to be updated w/in the source {@link
InternalRow}
+ * ({@link org.apache.spark.sql.catalyst.expressions.UnsafeRow} currently
does not
+ * allow in-place updates due to its memory layout)</li>
+ * </ul>
*/
public class HoodieInternalRow extends InternalRow {
- private String commitTime;
- private String commitSeqNumber;
- private String recordKey;
- private String partitionPath;
- private String fileName;
- private InternalRow row;
-
- public HoodieInternalRow(String commitTime, String commitSeqNumber, String
recordKey, String partitionPath,
- String fileName, InternalRow row) {
- this.commitTime = commitTime;
- this.commitSeqNumber = commitSeqNumber;
- this.recordKey = recordKey;
- this.partitionPath = partitionPath;
- this.fileName = fileName;
+ /**
+ * Collection of meta-fields as defined by {@link
HoodieRecord#HOODIE_META_COLUMNS}
+ */
+ private final UTF8String[] metaFields;
+ private final InternalRow row;
+
+ /**
+ * Specifies whether source {@link #row} contains meta-fields
+ */
+ private final boolean containsMetaFields;
+
+ public HoodieInternalRow(UTF8String commitTime,
+ UTF8String commitSeqNumber,
+ UTF8String recordKey,
+ UTF8String partitionPath,
+ UTF8String fileName,
+ InternalRow row,
+ boolean containsMetaFields) {
+ this.metaFields = new UTF8String[] {
+ commitTime,
+ commitSeqNumber,
+ recordKey,
+ partitionPath,
+ fileName
+ };
+
this.row = row;
+ this.containsMetaFields = containsMetaFields;
Review Comment:
Sg.
--
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.
To unsubscribe, e-mail: [email protected]
For queries about this service, please contact Infrastructure at:
[email protected]