Copilot commented on code in PR #2255: URL: https://github.com/apache/fluss/pull/2255#discussion_r2648272452
########## fluss-server/src/main/java/org/apache/fluss/server/kv/rowmerger/AggregateRowMerger.java: ########## @@ -0,0 +1,414 @@ +/* + * Licensed to the Apache Software Foundation (ASF) under one or more + * contributor license agreements. See the NOTICE file distributed with + * this work for additional information regarding copyright ownership. + * The ASF licenses this file to You under the Apache License, Version 2.0 + * (the "License"); you may not use this file except in compliance with + * the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ + +package org.apache.fluss.server.kv.rowmerger; + +import org.apache.fluss.config.TableConfig; +import org.apache.fluss.metadata.DeleteBehavior; +import org.apache.fluss.metadata.KvFormat; +import org.apache.fluss.metadata.Schema; +import org.apache.fluss.metadata.SchemaGetter; +import org.apache.fluss.record.BinaryValue; +import org.apache.fluss.row.BinaryRow; +import org.apache.fluss.row.encode.RowEncoder; +import org.apache.fluss.server.kv.rowmerger.aggregate.AggregateFieldsProcessor; +import org.apache.fluss.server.kv.rowmerger.aggregate.AggregationContext; +import org.apache.fluss.server.kv.rowmerger.aggregate.AggregationContextCache; + +import com.github.benmanes.caffeine.cache.Cache; +import com.github.benmanes.caffeine.cache.Caffeine; + +import javax.annotation.Nullable; + +import java.time.Duration; +import java.util.BitSet; +import java.util.HashSet; +import java.util.List; +import java.util.Set; + +/** + * A row merger that aggregates rows with the same primary key using field-level aggregate + * functions. + * + * <p>Each field can have its own aggregate function (e.g., sum, max, min, etc.). This allows for + * flexible aggregation semantics at the field level. + * + * <p>This merger supports schema evolution by dynamically retrieving schemas based on schema IDs + * when merging rows with different schema versions. + * + * <p>This class is thread-safe as it is guaranteed to be accessed by a single thread at a time + * (protected by KvTablet's write lock). + */ +public class AggregateRowMerger implements RowMerger { + + // Cache configuration constants + private static final int PARTIAL_MERGER_CACHE_MAX_SIZE = 4; + private static final Duration PARTIAL_MERGER_CACHE_EXPIRE_DURATION = Duration.ofMinutes(5); + + private final SchemaGetter schemaGetter; + private final DeleteBehavior deleteBehavior; + private final boolean removeRecordOnDelete; + private final AggregationContextCache contextCache; + + // Cache for PartialAggregateRowMerger instances to avoid repeated creation + private final Cache<CacheKey, PartialAggregateRowMerger> partialMergerCache; + + public AggregateRowMerger( + TableConfig tableConfig, KvFormat kvFormat, SchemaGetter schemaGetter) { + this.schemaGetter = schemaGetter; + // Extract configuration from TableConfig to ensure single source of truth + this.removeRecordOnDelete = tableConfig.getAggregationRemoveRecordOnDelete(); + this.deleteBehavior = tableConfig.getDeleteBehavior().orElse(DeleteBehavior.IGNORE); + this.contextCache = new AggregationContextCache(schemaGetter, tableConfig, kvFormat); + // Initialize cache with same settings as PartialUpdaterCache and AggregationContextCache + this.partialMergerCache = + Caffeine.newBuilder() + .maximumSize(PARTIAL_MERGER_CACHE_MAX_SIZE) + .expireAfterAccess(PARTIAL_MERGER_CACHE_EXPIRE_DURATION) + .build(); + } + + @Override + public BinaryValue merge(BinaryValue oldValue, BinaryValue newValue) { + // First write: no existing row + if (oldValue == null || oldValue.row == null) { + return newValue; + } + + // Get contexts for schema evolution support + // Optimize: reuse context when schemaId is the same, and avoid Tuple2 allocation + AggregationContext oldContext; + AggregationContext newContext; + + if (oldValue.schemaId == newValue.schemaId) { + // Same schema: reuse the same context to avoid duplicate lookup + newContext = contextCache.getContext(newValue.schemaId); + oldContext = newContext; + } else { + // Different schemas: need separate lookups + oldContext = contextCache.getContext(oldValue.schemaId); + newContext = contextCache.getContext(newValue.schemaId); + } + + // Use row encoder for aggregation + RowEncoder encoder = newContext.getRowEncoder(); + encoder.startNewRow(); + AggregateFieldsProcessor.aggregateAllFields( + oldValue.row, newValue.row, oldContext, newContext, encoder); + BinaryRow mergedRow = encoder.finishRow(); + + return new BinaryValue(newValue.schemaId, mergedRow); + } + + @Override + public BinaryValue delete(BinaryValue oldValue) { + if (removeRecordOnDelete) { + // Remove the entire row + return null; + } + throw new UnsupportedOperationException( + "DELETE is not supported for aggregate merge engine. " + + "Configure 'table.aggregation.remove-record-on-delete' to true if needed."); + } + + @Override + public DeleteBehavior deleteBehavior() { + return deleteBehavior; + } + + @Override + public RowMerger configureTargetColumns( + @Nullable int[] targetColumns, short schemaId, Schema schema) { + if (targetColumns == null) { + return this; + } + + // Use cache to get or create PartialAggregateRowMerger + // This avoids repeated object creation and BitSet construction + CacheKey cacheKey = new CacheKey(schemaId, targetColumns); + return partialMergerCache.get( + cacheKey, + k -> { + // Convert target column indices to column IDs + List<Schema.Column> columns = schema.getColumns(); + Set<Integer> targetColumnIds = new HashSet<>(); + for (int colIdx : targetColumns) { + targetColumnIds.add(columns.get(colIdx).getColumnId()); + } + + // Build BitSet for fast target column lookup + BitSet targetColumnIdBitSet = new BitSet(); + for (Integer columnId : targetColumnIds) { + targetColumnIdBitSet.set(columnId); + } + + // Create the PartialAggregateRowMerger instance + return new PartialAggregateRowMerger( + targetColumnIdBitSet, + removeRecordOnDelete, + deleteBehavior, + schemaGetter, + contextCache, + schema, + schemaId); + }); + } + + /** + * Cache key for PartialAggregateRowMerger instances. + * + * <p>Efficiently encodes schema ID and target column indices for cache lookup. Uses array + * content-based equality and hashCode for correct cache behavior. + */ + private static class CacheKey { Review Comment: Class CacheKey overrides [hashCode](1) but not equals. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected]
