clintropolis commented on code in PR #15434:
URL: https://github.com/apache/druid/pull/15434#discussion_r1406899167


##########
processing/src/main/java/org/apache/druid/query/aggregation/any/StringAnyVectorAggregator.java:
##########
@@ -78,7 +84,23 @@ public void aggregate(ByteBuffer buf, int position, int 
startRow, int endRow)
         if (startRow < rows.length) {
           IndexedInts row = rows[startRow];
           @Nullable
-          String foundValue = row.size() == 0 ? null : 
multiValueSelector.lookupName(row.get(0));
+          String foundValue;
+          if (row.size() == 0) {
+            foundValue = null;
+          } else if (aggregateMultipleValues) {
+            if (row.size() > 1) {
+              List<String> arrayList = new ArrayList<>();
+              row.forEach(rowIndex -> {
+                String chopped = 
StringUtils.fastLooseChop(multiValueSelector.lookupName(rowIndex), 
maxStringBytes);

Review Comment:
   i don't think you need to call `fastLooseChop` here because `putValue` uses 
`StringUtils.toUtf8WithLimit` which achieves the same thing



##########
processing/src/test/java/org/apache/druid/query/aggregation/any/StringAnyAggregatorFactoryTest.java:
##########
@@ -72,43 +68,107 @@ public void 
canVectorizeWithoutCapabilitiesShouldReturnTrue()
   @Test
   public void 
factorizeVectorWithoutCapabilitiesShouldReturnAggregatorWithMultiDimensionSelector()
   {
-    
Mockito.doReturn(null).when(vectorSelectorFactory).getColumnCapabilities(FIELD_NAME);
-    Mockito.doReturn(singleValueDimensionVectorSelector)
-           .when(vectorSelectorFactory)
-           .makeSingleValueDimensionSelector(any());
     StringAnyVectorAggregator aggregator = 
target.factorizeVector(vectorSelectorFactory);
     Assert.assertNotNull(aggregator);
   }
 
   @Test
   public void 
factorizeVectorWithUnknownCapabilitiesShouldReturnAggregatorWithMultiDimensionSelector()
   {
-    Mockito.doReturn(multiValueDimensionVectorSelector)
-           .when(vectorSelectorFactory)
-           .makeMultiValueDimensionSelector(any());
     StringAnyVectorAggregator aggregator = 
target.factorizeVector(vectorSelectorFactory);
     Assert.assertNotNull(aggregator);
   }
 
   @Test
   public void 
factorizeVectorWithMultipleValuesCapabilitiesShouldReturnAggregatorWithMultiDimensionSelector()
   {
-    
Mockito.doReturn(ColumnCapabilities.Capable.TRUE).when(capabilities).hasMultipleValues();
-    Mockito.doReturn(multiValueDimensionVectorSelector)
-           .when(vectorSelectorFactory)
-           .makeMultiValueDimensionSelector(any());
     StringAnyVectorAggregator aggregator = 
target.factorizeVector(vectorSelectorFactory);
     Assert.assertNotNull(aggregator);
   }
 
   @Test
   public void 
factorizeVectorWithoutMultipleValuesCapabilitiesShouldReturnAggregatorWithSingleDimensionSelector()
   {
-    
Mockito.doReturn(ColumnCapabilities.Capable.FALSE).when(capabilities).hasMultipleValues();
-    Mockito.doReturn(singleValueDimensionVectorSelector)
-           .when(vectorSelectorFactory)
-           .makeSingleValueDimensionSelector(any());
     StringAnyVectorAggregator aggregator = 
target.factorizeVector(vectorSelectorFactory);
     Assert.assertNotNull(aggregator);
   }
+
+  @Test
+  public void testFactorize()
+  {
+    Aggregator res = target.factorize(new TestColumnSelectorFactory());
+    Assert.assertTrue(res instanceof StringAnyAggregator);
+    res.aggregate();
+    Assert.assertEquals(null, res.get());
+    StringAnyVectorAggregator vectorAggregator = 
target.factorizeVector(vectorSelectorFactory);
+    Assert.assertTrue(vectorAggregator.aggregateMultipleValues);
+  }
+
+  @Test
+  public void testSvdStringAnyAggregator()
+  {
+    TestColumnSelectorFactory columnSelectorFactory = new 
TestColumnSelectorFactory();
+    Aggregator res = target.factorize(columnSelectorFactory);
+    Assert.assertTrue(res instanceof StringAnyAggregator);
+    columnSelectorFactory.moveSelectorCursorToNext();
+    res.aggregate();
+    Assert.assertEquals("CCCC", res.get());
+  }
+
+  @Test
+  public void testMvdStringAnyAggregator()
+  {
+    TestColumnSelectorFactory columnSelectorFactory = new 
TestColumnSelectorFactory();
+    Aggregator res = target.factorize(columnSelectorFactory);
+    Assert.assertTrue(res instanceof StringAnyAggregator);
+    columnSelectorFactory.moveSelectorCursorToNext();
+    columnSelectorFactory.moveSelectorCursorToNext();
+    res.aggregate();

Review Comment:
   why does it increment cursor twice before aggregating?



##########
sql/src/main/java/org/apache/druid/sql/calcite/aggregation/builtin/EarliestLatestAnySqlAggregator.java:
##########
@@ -158,7 +162,8 @@ AggregatorFactory createAggregatorFactory(
           case DOUBLE:
             return new DoubleAnyAggregatorFactory(name, fieldName);
           case STRING:
-            return new StringAnyAggregatorFactory(name, fieldName, 
maxStringBytes);
+          case COMPLEX:
+            return new StringAnyAggregatorFactory(name, fieldName, 
maxStringBytes, aggregateMultipleValues);

Review Comment:
   why would complex map to string? I think this means any type of complex 
would use the string aggregator which doesn't seem correct to me



##########
processing/src/main/java/org/apache/druid/query/aggregation/any/StringAnyAggregatorFactory.java:
##########
@@ -49,12 +49,14 @@ public class StringAnyAggregatorFactory extends 
AggregatorFactory
   private final String fieldName;
   private final String name;
   protected final int maxStringBytes;
+  protected final boolean aggregateMultipleValues;

Review Comment:
   why protected? (same applies to `maxStringBytes` too i think...) 



##########
processing/src/main/java/org/apache/druid/query/aggregation/any/StringAnyAggregator.java:
##########
@@ -24,38 +24,45 @@
 import org.apache.druid.segment.BaseObjectColumnValueSelector;
 import org.apache.druid.segment.DimensionHandlerUtils;
 
+import java.util.List;
+
 public class StringAnyAggregator implements Aggregator
 {
   private final BaseObjectColumnValueSelector valueSelector;
   private final int maxStringBytes;
   private boolean isFound;
   private String foundValue;
+  final boolean aggregateMultipleValues;

Review Comment:
   nit: private



##########
sql/src/main/java/org/apache/druid/sql/calcite/aggregation/builtin/EarliestLatestAnySqlAggregator.java:
##########
@@ -274,13 +299,31 @@ public Aggregation toDruidAggregation(
             args.size()
         );
     }
-
     return Aggregation.create(
         Collections.singletonList(theAggFactory),
         finalizeAggregations ? new FinalizingFieldAccessPostAggregator(name, 
aggregatorName) : null
     );
   }
 
+  private Integer getMaxStringBytes(
+      final List<RexNode> rexNodes,
+      final AggregateCall aggregateCall,
+      final PlannerContext plannerContext

Review Comment:
   this method seems strange to pass `List<RexNode>`, and only uses 
`rexNodes.get(1)` from it, please pass the `RexNode` to check directly



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to