Copilot commented on code in PR #17153:
URL: https://github.com/apache/pinot/pull/17153#discussion_r2496329652


##########
pinot-core/src/main/java/org/apache/pinot/core/query/aggregation/function/array/BaseArrayAggMvIntFunction.java:
##########
@@ -0,0 +1,100 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.pinot.core.query.aggregation.function.array;
+
+import it.unimi.dsi.fastutil.ints.IntArrayList;
+import it.unimi.dsi.fastutil.ints.IntCollection;
+import java.util.Map;
+import org.apache.pinot.common.request.context.ExpressionContext;
+import org.apache.pinot.core.common.BlockValSet;
+import org.apache.pinot.core.query.aggregation.groupby.GroupByResultHolder;
+import org.apache.pinot.segment.spi.AggregationFunctionType;
+import org.apache.pinot.spi.data.FieldSpec;
+
+
+public abstract class BaseArrayAggMvIntFunction<I extends IntCollection>
+    extends BaseArrayAggFunction<I, IntArrayList> {
+  public BaseArrayAggMvIntFunction(ExpressionContext expression, 
FieldSpec.DataType dataType,
+      boolean nullHandlingEnabled) {
+    super(expression, dataType, nullHandlingEnabled);
+  }
+
+  abstract void setGroupByResult(GroupByResultHolder groupByResultHolder, int 
groupKey, int value);
+
+  @Override
+  public AggregationFunctionType getType() {
+    return AggregationFunctionType.ARRAYAGGMV;
+  }
+
+  @Override
+  public void aggregateGroupBySV(int length, int[] groupKeyArray, 
GroupByResultHolder groupByResultHolder,
+      Map<ExpressionContext, BlockValSet> blockValSetMap) {
+    BlockValSet blockValSet = blockValSetMap.get(_expression);
+    int[][] valuesArray = blockValSet.getIntValuesMV();
+
+    forEachNotNull(length, blockValSet, (from, to) -> {
+      for (int i = from; i < to; i++) {
+        int groupKey = groupKeyArray[i];
+        int[] values = valuesArray[i];
+        for (int value : values) {
+          setGroupByResult(groupByResultHolder, groupKey, value);
+        }
+      }
+    });
+  }
+
+  @Override
+  public void aggregateGroupByMV(int length, int[][] groupKeysArray, 
GroupByResultHolder groupByResultHolder,
+      Map<ExpressionContext, BlockValSet> blockValSetMap) {
+    BlockValSet blockValSet = blockValSetMap.get(_expression);
+    int[][] valuesArray = blockValSet.getIntValuesMV();
+
+    forEachNotNull(length, blockValSet, (from, to) -> {
+      for (int i = from; i < to; i++) {
+        int[] groupKeys = groupKeysArray[i];
+        int[] values = valuesArray[i];
+        for (int groupKey : groupKeys) {
+          for (int value : values) {
+            setGroupByResult(groupByResultHolder, groupKey, value);
+          }
+        }
+      }
+    });
+  }
+
+  @Override
+  public I merge(I intermediateResult1, I intermediateResult2) {
+    if (intermediateResult1 == null || intermediateResult1.isEmpty()) {
+      return intermediateResult2;
+    }
+    if (intermediateResult2 == null || intermediateResult2.isEmpty()) {
+      return intermediateResult1;
+    }

Review Comment:
   The merge logic inconsistently checks isEmpty() on IntCollection in 
BaseArrayAggMvIntFunction and BaseArrayAggMvLongFunction but not in 
BaseArrayAggMvFloatFunction, BaseArrayAggMvDoubleFunction, and 
BaseArrayAggMvStringFunction. For consistency and to avoid merging empty 
collections, add isEmpty() checks in all merge methods.



##########
pinot-core/src/main/java/org/apache/pinot/core/query/aggregation/function/array/ArrayAggMvDistinctStringFunction.java:
##########
@@ -0,0 +1,100 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.pinot.core.query.aggregation.function.array;
+
+import it.unimi.dsi.fastutil.objects.ObjectArrayList;
+import it.unimi.dsi.fastutil.objects.ObjectIterators;
+import it.unimi.dsi.fastutil.objects.ObjectOpenHashSet;
+import it.unimi.dsi.fastutil.objects.ObjectSet;
+import java.util.Map;
+import org.apache.pinot.common.CustomObject;
+import org.apache.pinot.common.request.context.ExpressionContext;
+import org.apache.pinot.core.common.BlockValSet;
+import org.apache.pinot.core.common.ObjectSerDeUtils;
+import org.apache.pinot.core.query.aggregation.AggregationResultHolder;
+import org.apache.pinot.core.query.aggregation.groupby.GroupByResultHolder;
+
+
+public class ArrayAggMvDistinctStringFunction extends 
BaseArrayAggMvStringFunction<ObjectSet<String>> {
+  public ArrayAggMvDistinctStringFunction(ExpressionContext expression, 
boolean nullHandlingEnabled) {
+    super(expression, nullHandlingEnabled);
+  }
+
+  @Override
+  public void aggregate(int length, AggregationResultHolder 
aggregationResultHolder,
+      Map<ExpressionContext, BlockValSet> blockValSetMap) {
+    BlockValSet blockValSet = blockValSetMap.get(_expression);
+    String[][] valuesArray = blockValSet.getStringValuesMV();
+    ObjectSet<String> existing = aggregationResultHolder.getResult();
+    ObjectOpenHashSet<String> valueSet =
+        existing != null ? new ObjectOpenHashSet<>(existing) : new 
ObjectOpenHashSet<>(length);

Review Comment:
   Creating a new ObjectOpenHashSet from an existing set on every aggregate 
call is inefficient. Consider reusing the existing set directly: `valueSet = 
existing != null ? (ObjectOpenHashSet<String>) existing : new 
ObjectOpenHashSet<>(length);` and then calling 
`aggregationResultHolder.setValue(valueSet)` only when existing was null. This 
avoids unnecessary allocations when merging multiple blocks.



##########
pinot-core/src/test/java/org/apache/pinot/core/query/aggregation/function/ArrayAggMvFunctionTest.java:
##########
@@ -0,0 +1,148 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.pinot.core.query.aggregation.function;
+
+import it.unimi.dsi.fastutil.doubles.DoubleArrayList;
+import it.unimi.dsi.fastutil.doubles.DoubleOpenHashSet;
+import it.unimi.dsi.fastutil.longs.LongArrayList;
+import it.unimi.dsi.fastutil.longs.LongOpenHashSet;
+import java.util.Map;
+import org.apache.pinot.common.request.context.ExpressionContext;
+import org.apache.pinot.core.common.ObjectSerDeUtils;
+import org.apache.pinot.core.common.SyntheticBlockValSets;
+import org.apache.pinot.core.query.aggregation.AggregationResultHolder;
+import 
org.apache.pinot.core.query.aggregation.function.array.ArrayAggMvDistinctDoubleFunction;
+import 
org.apache.pinot.core.query.aggregation.function.array.ArrayAggMvDistinctLongFunction;
+import 
org.apache.pinot.core.query.aggregation.function.array.ArrayAggMvDoubleFunction;
+import 
org.apache.pinot.core.query.aggregation.function.array.ArrayAggMvLongFunction;
+import org.apache.pinot.core.query.aggregation.groupby.GroupByResultHolder;
+import 
org.apache.pinot.core.query.aggregation.groupby.ObjectGroupByResultHolder;
+import org.apache.pinot.spi.data.FieldSpec;
+import org.testng.annotations.Test;
+
+import static org.testng.Assert.assertEquals;
+
+
+public class ArrayAggMvFunctionTest extends AbstractAggregationFunctionTest {
+
+  private static class TestDoubleMVBlock extends SyntheticBlockValSets.Base {
+    private final double[][] _values;
+
+    TestDoubleMVBlock(double[][] values) {
+      _values = values;
+    }
+
+    @Override
+    public boolean isSingleValue() {
+      return false;
+    }
+
+    @Override
+    public double[][] getDoubleValuesMV() {
+      return _values;
+    }
+
+    @Override
+    public FieldSpec.DataType getValueType() {
+      return FieldSpec.DataType.DOUBLE;
+    }
+  }
+
+  private static class TestLongMVBlock extends SyntheticBlockValSets.Base {
+    private final long[][] _values;
+
+    TestLongMVBlock(long[][] values) {
+      _values = values;
+    }
+
+    @Override
+    public boolean isSingleValue() {
+      return false;
+    }
+
+    @Override
+    public long[][] getLongValuesMV() {
+      return _values;
+    }
+
+    @Override
+    public FieldSpec.DataType getValueType() {
+      return FieldSpec.DataType.LONG;
+    }
+  }
+
+  @Test
+  public void testDoubleArrayAggMvMultipleBlocks() {
+    ArrayAggMvDistinctDoubleFunction distinctFn =
+        new 
ArrayAggMvDistinctDoubleFunction(ExpressionContext.forIdentifier("myField"), 
false);
+    AggregationResultHolder holder = 
distinctFn.createAggregationResultHolder();
+
+    distinctFn.aggregate(2, holder,
+        Map.of(ExpressionContext.forIdentifier("myField"), new 
TestDoubleMVBlock(new double[][]{{1.0, 2.0}, {2.0}})));
+    distinctFn.aggregate(2, holder,
+        Map.of(ExpressionContext.forIdentifier("myField"), new 
TestDoubleMVBlock(new double[][]{{2.0, 3.0}, {3.0}})));
+    DoubleOpenHashSet distinct = holder.getResult();
+    assertEquals(distinct.size(), 3);
+
+    ArrayAggMvDoubleFunction fn = new 
ArrayAggMvDoubleFunction(ExpressionContext.forIdentifier("myField"), false);
+    holder = fn.createAggregationResultHolder();
+    fn.aggregate(2, holder,
+        Map.of(ExpressionContext.forIdentifier("myField"), new 
TestDoubleMVBlock(new double[][]{{1.0, 2.0}, {2.0}})));
+    fn.aggregate(2, holder,
+        Map.of(ExpressionContext.forIdentifier("myField"), new 
TestDoubleMVBlock(new double[][]{{2.0, 3.0}, {3.0}})));
+    DoubleArrayList result = holder.getResult();
+    assertEquals(result.size(), 6);
+
+    // round-trip ser/de
+    AggregationFunction.SerializedIntermediateResult ser = 
fn.serializeIntermediateResult(result);
+    DoubleArrayList deser = ObjectSerDeUtils.deserialize(ser.getBytes(), 
ObjectSerDeUtils.ObjectType.DoubleArrayList);

Review Comment:
   The test directly uses `ObjectSerDeUtils.deserialize()` instead of calling 
the function's `deserializeIntermediateResult()` method. For consistency and to 
ensure the function's deserialization logic is tested, use 
`fn.deserializeIntermediateResult(new CustomObject(ser.getBytes()))` or a 
similar approach that exercises the function's deserialization method.
   ```suggestion
       DoubleArrayList deser = fn.deserializeIntermediateResult(ser);
   ```



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to