Copilot commented on code in PR #17153: URL: https://github.com/apache/pinot/pull/17153#discussion_r2496329652
########## pinot-core/src/main/java/org/apache/pinot/core/query/aggregation/function/array/BaseArrayAggMvIntFunction.java: ########## @@ -0,0 +1,100 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.pinot.core.query.aggregation.function.array; + +import it.unimi.dsi.fastutil.ints.IntArrayList; +import it.unimi.dsi.fastutil.ints.IntCollection; +import java.util.Map; +import org.apache.pinot.common.request.context.ExpressionContext; +import org.apache.pinot.core.common.BlockValSet; +import org.apache.pinot.core.query.aggregation.groupby.GroupByResultHolder; +import org.apache.pinot.segment.spi.AggregationFunctionType; +import org.apache.pinot.spi.data.FieldSpec; + + +public abstract class BaseArrayAggMvIntFunction<I extends IntCollection> + extends BaseArrayAggFunction<I, IntArrayList> { + public BaseArrayAggMvIntFunction(ExpressionContext expression, FieldSpec.DataType dataType, + boolean nullHandlingEnabled) { + super(expression, dataType, nullHandlingEnabled); + } + + abstract void setGroupByResult(GroupByResultHolder groupByResultHolder, int groupKey, int value); + + @Override + public AggregationFunctionType getType() { + return AggregationFunctionType.ARRAYAGGMV; + } + + @Override + public void aggregateGroupBySV(int length, int[] groupKeyArray, GroupByResultHolder groupByResultHolder, + Map<ExpressionContext, BlockValSet> blockValSetMap) { + BlockValSet blockValSet = blockValSetMap.get(_expression); + int[][] valuesArray = blockValSet.getIntValuesMV(); + + forEachNotNull(length, blockValSet, (from, to) -> { + for (int i = from; i < to; i++) { + int groupKey = groupKeyArray[i]; + int[] values = valuesArray[i]; + for (int value : values) { + setGroupByResult(groupByResultHolder, groupKey, value); + } + } + }); + } + + @Override + public void aggregateGroupByMV(int length, int[][] groupKeysArray, GroupByResultHolder groupByResultHolder, + Map<ExpressionContext, BlockValSet> blockValSetMap) { + BlockValSet blockValSet = blockValSetMap.get(_expression); + int[][] valuesArray = blockValSet.getIntValuesMV(); + + forEachNotNull(length, blockValSet, (from, to) -> { + for (int i = from; i < to; i++) { + int[] groupKeys = groupKeysArray[i]; + int[] values = valuesArray[i]; + for (int groupKey : groupKeys) { + for (int value : values) { + setGroupByResult(groupByResultHolder, groupKey, value); + } + } + } + }); + } + + @Override + public I merge(I intermediateResult1, I intermediateResult2) { + if (intermediateResult1 == null || intermediateResult1.isEmpty()) { + return intermediateResult2; + } + if (intermediateResult2 == null || intermediateResult2.isEmpty()) { + return intermediateResult1; + } Review Comment: The merge logic inconsistently checks isEmpty() on IntCollection in BaseArrayAggMvIntFunction and BaseArrayAggMvLongFunction but not in BaseArrayAggMvFloatFunction, BaseArrayAggMvDoubleFunction, and BaseArrayAggMvStringFunction. For consistency and to avoid merging empty collections, add isEmpty() checks in all merge methods. ########## pinot-core/src/main/java/org/apache/pinot/core/query/aggregation/function/array/ArrayAggMvDistinctStringFunction.java: ########## @@ -0,0 +1,100 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.pinot.core.query.aggregation.function.array; + +import it.unimi.dsi.fastutil.objects.ObjectArrayList; +import it.unimi.dsi.fastutil.objects.ObjectIterators; +import it.unimi.dsi.fastutil.objects.ObjectOpenHashSet; +import it.unimi.dsi.fastutil.objects.ObjectSet; +import java.util.Map; +import org.apache.pinot.common.CustomObject; +import org.apache.pinot.common.request.context.ExpressionContext; +import org.apache.pinot.core.common.BlockValSet; +import org.apache.pinot.core.common.ObjectSerDeUtils; +import org.apache.pinot.core.query.aggregation.AggregationResultHolder; +import org.apache.pinot.core.query.aggregation.groupby.GroupByResultHolder; + + +public class ArrayAggMvDistinctStringFunction extends BaseArrayAggMvStringFunction<ObjectSet<String>> { + public ArrayAggMvDistinctStringFunction(ExpressionContext expression, boolean nullHandlingEnabled) { + super(expression, nullHandlingEnabled); + } + + @Override + public void aggregate(int length, AggregationResultHolder aggregationResultHolder, + Map<ExpressionContext, BlockValSet> blockValSetMap) { + BlockValSet blockValSet = blockValSetMap.get(_expression); + String[][] valuesArray = blockValSet.getStringValuesMV(); + ObjectSet<String> existing = aggregationResultHolder.getResult(); + ObjectOpenHashSet<String> valueSet = + existing != null ? new ObjectOpenHashSet<>(existing) : new ObjectOpenHashSet<>(length); Review Comment: Creating a new ObjectOpenHashSet from an existing set on every aggregate call is inefficient. Consider reusing the existing set directly: `valueSet = existing != null ? (ObjectOpenHashSet<String>) existing : new ObjectOpenHashSet<>(length);` and then calling `aggregationResultHolder.setValue(valueSet)` only when existing was null. This avoids unnecessary allocations when merging multiple blocks. ########## pinot-core/src/test/java/org/apache/pinot/core/query/aggregation/function/ArrayAggMvFunctionTest.java: ########## @@ -0,0 +1,148 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.pinot.core.query.aggregation.function; + +import it.unimi.dsi.fastutil.doubles.DoubleArrayList; +import it.unimi.dsi.fastutil.doubles.DoubleOpenHashSet; +import it.unimi.dsi.fastutil.longs.LongArrayList; +import it.unimi.dsi.fastutil.longs.LongOpenHashSet; +import java.util.Map; +import org.apache.pinot.common.request.context.ExpressionContext; +import org.apache.pinot.core.common.ObjectSerDeUtils; +import org.apache.pinot.core.common.SyntheticBlockValSets; +import org.apache.pinot.core.query.aggregation.AggregationResultHolder; +import org.apache.pinot.core.query.aggregation.function.array.ArrayAggMvDistinctDoubleFunction; +import org.apache.pinot.core.query.aggregation.function.array.ArrayAggMvDistinctLongFunction; +import org.apache.pinot.core.query.aggregation.function.array.ArrayAggMvDoubleFunction; +import org.apache.pinot.core.query.aggregation.function.array.ArrayAggMvLongFunction; +import org.apache.pinot.core.query.aggregation.groupby.GroupByResultHolder; +import org.apache.pinot.core.query.aggregation.groupby.ObjectGroupByResultHolder; +import org.apache.pinot.spi.data.FieldSpec; +import org.testng.annotations.Test; + +import static org.testng.Assert.assertEquals; + + +public class ArrayAggMvFunctionTest extends AbstractAggregationFunctionTest { + + private static class TestDoubleMVBlock extends SyntheticBlockValSets.Base { + private final double[][] _values; + + TestDoubleMVBlock(double[][] values) { + _values = values; + } + + @Override + public boolean isSingleValue() { + return false; + } + + @Override + public double[][] getDoubleValuesMV() { + return _values; + } + + @Override + public FieldSpec.DataType getValueType() { + return FieldSpec.DataType.DOUBLE; + } + } + + private static class TestLongMVBlock extends SyntheticBlockValSets.Base { + private final long[][] _values; + + TestLongMVBlock(long[][] values) { + _values = values; + } + + @Override + public boolean isSingleValue() { + return false; + } + + @Override + public long[][] getLongValuesMV() { + return _values; + } + + @Override + public FieldSpec.DataType getValueType() { + return FieldSpec.DataType.LONG; + } + } + + @Test + public void testDoubleArrayAggMvMultipleBlocks() { + ArrayAggMvDistinctDoubleFunction distinctFn = + new ArrayAggMvDistinctDoubleFunction(ExpressionContext.forIdentifier("myField"), false); + AggregationResultHolder holder = distinctFn.createAggregationResultHolder(); + + distinctFn.aggregate(2, holder, + Map.of(ExpressionContext.forIdentifier("myField"), new TestDoubleMVBlock(new double[][]{{1.0, 2.0}, {2.0}}))); + distinctFn.aggregate(2, holder, + Map.of(ExpressionContext.forIdentifier("myField"), new TestDoubleMVBlock(new double[][]{{2.0, 3.0}, {3.0}}))); + DoubleOpenHashSet distinct = holder.getResult(); + assertEquals(distinct.size(), 3); + + ArrayAggMvDoubleFunction fn = new ArrayAggMvDoubleFunction(ExpressionContext.forIdentifier("myField"), false); + holder = fn.createAggregationResultHolder(); + fn.aggregate(2, holder, + Map.of(ExpressionContext.forIdentifier("myField"), new TestDoubleMVBlock(new double[][]{{1.0, 2.0}, {2.0}}))); + fn.aggregate(2, holder, + Map.of(ExpressionContext.forIdentifier("myField"), new TestDoubleMVBlock(new double[][]{{2.0, 3.0}, {3.0}}))); + DoubleArrayList result = holder.getResult(); + assertEquals(result.size(), 6); + + // round-trip ser/de + AggregationFunction.SerializedIntermediateResult ser = fn.serializeIntermediateResult(result); + DoubleArrayList deser = ObjectSerDeUtils.deserialize(ser.getBytes(), ObjectSerDeUtils.ObjectType.DoubleArrayList); Review Comment: The test directly uses `ObjectSerDeUtils.deserialize()` instead of calling the function's `deserializeIntermediateResult()` method. For consistency and to ensure the function's deserialization logic is tested, use `fn.deserializeIntermediateResult(new CustomObject(ser.getBytes()))` or a similar approach that exercises the function's deserialization method. ```suggestion DoubleArrayList deser = fn.deserializeIntermediateResult(ser); ``` -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected] --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
