the-other-tim-brown commented on code in PR #13213: URL: https://github.com/apache/hudi/pull/13213#discussion_r2056133806
########## hudi-common/src/main/java/org/apache/hudi/common/table/read/BufferedRecord.java: ########## @@ -0,0 +1,105 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hudi.common.table.read; + +import org.apache.hudi.common.engine.HoodieReaderContext; +import org.apache.hudi.common.model.DeleteRecord; +import org.apache.hudi.common.util.Option; + +import org.apache.avro.Schema; + +import java.io.Serializable; + +import static org.apache.hudi.common.model.HoodieRecord.DEFAULT_ORDERING_VALUE; + +/** + * Buffered Record used by file group reader. + */ +public class BufferedRecord<T> implements Serializable { + private final String recordKey; + private final String partitionPath; Review Comment: Currently when we're using this object in the FileGroupReader, I think we can assume the partition path is always the same. If that is the case we can remove this field to reduce the serialized size ########## hudi-common/src/main/java/org/apache/hudi/common/table/read/BufferedRecord.java: ########## @@ -0,0 +1,105 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hudi.common.table.read; + +import org.apache.hudi.common.engine.HoodieReaderContext; +import org.apache.hudi.common.model.DeleteRecord; +import org.apache.hudi.common.util.Option; + +import org.apache.avro.Schema; + +import java.io.Serializable; + +import static org.apache.hudi.common.model.HoodieRecord.DEFAULT_ORDERING_VALUE; + +/** + * Buffered Record used by file group reader. + */ +public class BufferedRecord<T> implements Serializable { + private final String recordKey; + private final String partitionPath; + private final Comparable orderingValue; + private T record; + private final Integer schemaId; + private final boolean isDelete; + + private BufferedRecord(String partitionPath, String recordKey, Comparable orderingValue, T record, Integer schemaId, boolean isDelete) { + this.partitionPath = partitionPath; + this.recordKey = recordKey; + this.orderingValue = orderingValue; + this.record = record; + this.schemaId = schemaId; + this.isDelete = isDelete; + } + + public static <T> BufferedRecord<T> forRecordWithContext(T record, Schema schema, HoodieReaderContext<T> readerContext, Option<String> orderingFieldName, boolean isDelete) { + String recordKey = readerContext.getRecordKey(record, schema); + Integer schemaId = readerContext.encodeAvroSchema(schema); + Comparable orderingValue = readerContext.getOrderingValue(record, schema, orderingFieldName); + return new BufferedRecord<>(null, recordKey, orderingValue, record, schemaId, isDelete); + } + + public static <T> BufferedRecord<T> forDeleteRecord(DeleteRecord deleteRecord, Comparable orderingValue) { + return new BufferedRecord<>( + deleteRecord.getPartitionPath(), deleteRecord.getRecordKey(), orderingValue, null, null, true); + } + + public String getPartitionPath() { + return partitionPath; + } + + public String getRecordKey() { + return recordKey; + } + + public Comparable getOrderingValue() { + return orderingValue; + } + + public T getRecord() { + return record; + } + + public Integer getSchemaId() { + return schemaId; + } + + public boolean isDelete() { + return isDelete; + } + + public boolean isEmptyRecord() { + return isDelete; + } Review Comment: Do we need both of these methods? Also with some payloads like the postgres payload, you will have a record body even when it is a delete ########## hudi-common/src/main/java/org/apache/hudi/common/table/read/BufferedRecord.java: ########## @@ -0,0 +1,105 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.hudi.common.table.read; + +import org.apache.hudi.common.engine.HoodieReaderContext; +import org.apache.hudi.common.model.DeleteRecord; +import org.apache.hudi.common.util.Option; + +import org.apache.avro.Schema; + +import java.io.Serializable; + +import static org.apache.hudi.common.model.HoodieRecord.DEFAULT_ORDERING_VALUE; + +/** + * Buffered Record used by file group reader. + */ +public class BufferedRecord<T> implements Serializable { + private final String recordKey; + private final String partitionPath; + private final Comparable orderingValue; + private T record; + private final Integer schemaId; + private final boolean isDelete; + + private BufferedRecord(String partitionPath, String recordKey, Comparable orderingValue, T record, Integer schemaId, boolean isDelete) { + this.partitionPath = partitionPath; + this.recordKey = recordKey; + this.orderingValue = orderingValue; + this.record = record; + this.schemaId = schemaId; + this.isDelete = isDelete; + } + + public static <T> BufferedRecord<T> forRecordWithContext(T record, Schema schema, HoodieReaderContext<T> readerContext, Option<String> orderingFieldName, boolean isDelete) { + String recordKey = readerContext.getRecordKey(record, schema); + Integer schemaId = readerContext.encodeAvroSchema(schema); + Comparable orderingValue = readerContext.getOrderingValue(record, schema, orderingFieldName); + return new BufferedRecord<>(null, recordKey, orderingValue, record, schemaId, isDelete); + } + + public static <T> BufferedRecord<T> forDeleteRecord(DeleteRecord deleteRecord, Comparable orderingValue) { + return new BufferedRecord<>( + deleteRecord.getPartitionPath(), deleteRecord.getRecordKey(), orderingValue, null, null, true); + } + + public String getPartitionPath() { + return partitionPath; + } + + public String getRecordKey() { + return recordKey; + } + + public Comparable getOrderingValue() { + return orderingValue; + } + + public T getRecord() { + return record; + } + + public Integer getSchemaId() { + return schemaId; + } + + public boolean isDelete() { + return isDelete; + } + + public boolean isEmptyRecord() { + return isDelete; + } + + public boolean isDeleteRecordWithNaturalOrder() { Review Comment: `naturalOrdering` is a confusing concept to me. usually natural ordering would be something like 1,2,3 but in this case 0 is greater than 1, 2, and 3. I think having some other naming would be helpful here like `hardDelete` or `forcedDeletion` -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected]
