gortiz commented on code in PR #16497:
URL: https://github.com/apache/pinot/pull/16497#discussion_r2256231899


##########
pinot-core/src/main/java/org/apache/pinot/core/query/aggregation/function/MaxStringAggregationFunction.java:
##########
@@ -0,0 +1,173 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.pinot.core.query.aggregation.function;
+
+import java.util.List;
+import java.util.Map;
+import org.apache.pinot.common.request.context.ExpressionContext;
+import org.apache.pinot.common.utils.DataSchema.ColumnDataType;
+import org.apache.pinot.core.common.BlockValSet;
+import org.apache.pinot.core.query.aggregation.AggregationResultHolder;
+import org.apache.pinot.core.query.aggregation.ObjectAggregationResultHolder;
+import org.apache.pinot.core.query.aggregation.groupby.GroupByResultHolder;
+import 
org.apache.pinot.core.query.aggregation.groupby.ObjectGroupByResultHolder;
+import org.apache.pinot.segment.spi.AggregationFunctionType;
+import org.apache.pinot.spi.exception.BadQueryRequestException;
+
+
+public class MaxStringAggregationFunction extends 
NullableSingleInputAggregationFunction<String, String> {
+
+  public MaxStringAggregationFunction(List<ExpressionContext> arguments, 
boolean nullHandlingEnabled) {
+    super(verifySingleArgument(arguments, "MAXSTRING"), nullHandlingEnabled);
+  }
+
+  @Override
+  public AggregationFunctionType getType() {
+    return AggregationFunctionType.MAXSTRING;
+  }
+
+  @Override
+  public AggregationResultHolder createAggregationResultHolder() {
+    return new ObjectAggregationResultHolder();
+  }
+
+  @Override
+  public GroupByResultHolder createGroupByResultHolder(int initialCapacity, 
int maxCapacity) {
+    return new ObjectGroupByResultHolder(initialCapacity, maxCapacity);
+  }
+
+  @Override
+  public void aggregate(int length, AggregationResultHolder 
aggregationResultHolder,
+      Map<ExpressionContext, BlockValSet> blockValSetMap) {
+    BlockValSet blockValSet = blockValSetMap.get(_expression);
+    if (blockValSet.getValueType().isNumeric()) {
+      throw new BadQueryRequestException("Cannot compute MAXSTRING for numeric 
column: "
+          + blockValSet.getValueType());
+    }
+    String[] stringValues = blockValSet.getStringValuesSV();
+    forEachNotNull(length, blockValSet, (from, to) -> {
+      for (int i = from; i < to; i++) {
+        String value = stringValues[i];
+        // Ignore null and "null" string literals
+        if (value == null || "null".equals(value)) {
+          continue;
+        }
+        String currentMax = aggregationResultHolder.getResult();
+        // Update the currentMax if a larger string value is found
+        if (currentMax == null || value.compareTo(currentMax) > 0) {
+          aggregationResultHolder.setValue(value);
+        }
+      }
+    });
+  }
+
+  @Override
+  public void aggregateGroupBySV(int length, int[] groupKeyArray, 
GroupByResultHolder groupByResultHolder,
+      Map<ExpressionContext, BlockValSet> blockValSetMap) {
+    BlockValSet blockValSet = blockValSetMap.get(_expression);
+    if (blockValSet.getValueType().isNumeric()) {
+      throw new BadQueryRequestException("Cannot compute MAXSTRING for numeric 
column: "
+          + blockValSet.getValueType());
+    }
+    String[] stringValues = blockValSet.getStringValuesSV();
+    forEachNotNull(length, blockValSet, (from, to) -> {
+      for (int i = from; i < to; i++) {
+        String value = stringValues[i];
+        // For SV, "null" as a string literal can exist and needs to be handled
+        if (value == null || "null".equals(value)) {
+          continue;
+        }
+        int groupKey = groupKeyArray[i];
+        String currentMax = groupByResultHolder.getResult(groupKey);
+        if (currentMax == null || "null".equals(currentMax) || 
value.compareTo(currentMax) > 0) {
+          groupByResultHolder.setValueForKey(groupKey, value);
+        }
+      }
+    });
+  }
+
+  @Override
+  public void aggregateGroupByMV(int length, int[][] groupKeysArray, 
GroupByResultHolder groupByResultHolder,
+      Map<ExpressionContext, BlockValSet> blockValSetMap) {
+    BlockValSet blockValSet = blockValSetMap.get(_expression);
+    if (blockValSet.getValueType().isNumeric()) {
+      throw new BadQueryRequestException("Cannot compute MAXSTRING for numeric 
column: "
+          + blockValSet.getValueType());
+    }
+    String[] stringValues = blockValSet.getStringValuesSV();
+    forEachNotNull(length, blockValSet, (from, to) -> {
+      for (int i = from; i < to; i++) {
+        String value = stringValues[i];
+        // For MV, "null" as a string literal can exist and needs to be handled
+        if (value == null || "null".equals(value)) {
+          continue;
+        }

Review Comment:
   Same here



##########
pinot-core/src/test/java/org/apache/pinot/core/query/aggregation/function/MaxStringAggregationFunctionTest.java:
##########
@@ -0,0 +1,287 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.pinot.core.query.aggregation.function;
+
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.Map;
+import org.apache.pinot.common.request.context.ExpressionContext;
+import org.apache.pinot.common.request.context.RequestContextUtils;
+import org.apache.pinot.core.common.BlockValSet;
+import org.apache.pinot.core.query.aggregation.AggregationResultHolder;
+import org.apache.pinot.core.query.aggregation.groupby.GroupByResultHolder;
+import org.apache.pinot.queries.FluentQueryTest;
+import org.apache.pinot.segment.spi.AggregationFunctionType;
+import org.apache.pinot.spi.data.FieldSpec;
+import org.apache.pinot.spi.data.Schema;
+import org.apache.pinot.spi.exception.BadQueryRequestException;
+import org.testng.annotations.Test;
+
+import static org.mockito.Mockito.mock;
+import static org.mockito.Mockito.when;
+import static org.testng.Assert.assertEquals;
+import static org.testng.Assert.assertNull;
+import static org.testng.Assert.assertTrue;
+import static org.testng.Assert.fail;
+
+
+public class MaxStringAggregationFunctionTest extends 
AbstractAggregationFunctionTest {
+
+  /**
+   * Helper method to create a FluentQueryTest builder for a table with a 
single String field.
+   * This is used to simulate the DataTypeScenario concept from numeric 
aggregation tests,
+   * but fixed for the STRING data type.
+   */
+  protected FluentQueryTest.DeclaringTable getDeclaringTable(boolean 
enableColumnBasedNullHandling) {
+    return FluentQueryTest.withBaseDir(_baseDir)
+        .givenTable(
+            new Schema.SchemaBuilder()
+                .setSchemaName("testTable")
+                
.setEnableColumnBasedNullHandling(enableColumnBasedNullHandling)
+                .addSingleValueDimension("myField", FieldSpec.DataType.STRING)
+                .build(), SINGLE_FIELD_TABLE_CONFIG);
+  }
+
+  @Test
+  public void testNumericColumnException() {
+    ExpressionContext expression = RequestContextUtils.getExpression("column");
+    MaxStringAggregationFunction function = new 
MaxStringAggregationFunction(Collections.singletonList(expression),
+        false);
+
+    AggregationResultHolder resultHolder = 
function.createAggregationResultHolder();
+    GroupByResultHolder groupByResultHolder = 
function.createGroupByResultHolder(10, 20);
+
+    Map<ExpressionContext, BlockValSet> blockValSetMap = new HashMap<>();
+    BlockValSet mockBlockValSet = mock(BlockValSet.class);
+    when(mockBlockValSet.getValueType()).thenReturn(FieldSpec.DataType.INT);
+    blockValSetMap.put(expression, mockBlockValSet);
+
+    // Test exception in aggregate method
+    try {
+      function.aggregate(10, resultHolder, blockValSetMap);
+      fail("Should throw BadQueryRequestException");
+    } catch (BadQueryRequestException e) {
+      assertTrue(e.getMessage().contains("Cannot compute MAXSTRING for numeric 
column"));
+    }
+
+    // Test exception in aggregateGroupBySV method
+    try {
+      function.aggregateGroupBySV(10, new int[10], groupByResultHolder, 
blockValSetMap);
+      fail("Should throw BadQueryRequestException");
+    } catch (BadQueryRequestException e) {
+      assertTrue(e.getMessage().contains("Cannot compute MAXSTRING for numeric 
column"));
+    }

Review Comment:
   Please create one test for each of these cases. In the past, we overused 
test methods that test too many things, but that is a bad practice. Instead, a 
test case should have its test method, so that if a bug affects several cases, 
we will get one error per test that fails. In the current method, only the 
first one would be reported.



##########
pinot-core/src/test/java/org/apache/pinot/core/query/aggregation/function/MaxStringAggregationFunctionTest.java:
##########
@@ -0,0 +1,287 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.pinot.core.query.aggregation.function;
+
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.Map;
+import org.apache.pinot.common.request.context.ExpressionContext;
+import org.apache.pinot.common.request.context.RequestContextUtils;
+import org.apache.pinot.core.common.BlockValSet;
+import org.apache.pinot.core.query.aggregation.AggregationResultHolder;
+import org.apache.pinot.core.query.aggregation.groupby.GroupByResultHolder;
+import org.apache.pinot.queries.FluentQueryTest;
+import org.apache.pinot.segment.spi.AggregationFunctionType;
+import org.apache.pinot.spi.data.FieldSpec;
+import org.apache.pinot.spi.data.Schema;
+import org.apache.pinot.spi.exception.BadQueryRequestException;
+import org.testng.annotations.Test;
+
+import static org.mockito.Mockito.mock;
+import static org.mockito.Mockito.when;
+import static org.testng.Assert.assertEquals;
+import static org.testng.Assert.assertNull;
+import static org.testng.Assert.assertTrue;
+import static org.testng.Assert.fail;
+
+
+public class MaxStringAggregationFunctionTest extends 
AbstractAggregationFunctionTest {
+
+  /**
+   * Helper method to create a FluentQueryTest builder for a table with a 
single String field.
+   * This is used to simulate the DataTypeScenario concept from numeric 
aggregation tests,
+   * but fixed for the STRING data type.
+   */
+  protected FluentQueryTest.DeclaringTable getDeclaringTable(boolean 
enableColumnBasedNullHandling) {
+    return FluentQueryTest.withBaseDir(_baseDir)
+        .givenTable(
+            new Schema.SchemaBuilder()
+                .setSchemaName("testTable")
+                
.setEnableColumnBasedNullHandling(enableColumnBasedNullHandling)
+                .addSingleValueDimension("myField", FieldSpec.DataType.STRING)
+                .build(), SINGLE_FIELD_TABLE_CONFIG);
+  }
+
+  @Test
+  public void testNumericColumnException() {
+    ExpressionContext expression = RequestContextUtils.getExpression("column");
+    MaxStringAggregationFunction function = new 
MaxStringAggregationFunction(Collections.singletonList(expression),
+        false);
+
+    AggregationResultHolder resultHolder = 
function.createAggregationResultHolder();
+    GroupByResultHolder groupByResultHolder = 
function.createGroupByResultHolder(10, 20);
+
+    Map<ExpressionContext, BlockValSet> blockValSetMap = new HashMap<>();
+    BlockValSet mockBlockValSet = mock(BlockValSet.class);
+    when(mockBlockValSet.getValueType()).thenReturn(FieldSpec.DataType.INT);
+    blockValSetMap.put(expression, mockBlockValSet);
+
+    // Test exception in aggregate method
+    try {
+      function.aggregate(10, resultHolder, blockValSetMap);
+      fail("Should throw BadQueryRequestException");
+    } catch (BadQueryRequestException e) {
+      assertTrue(e.getMessage().contains("Cannot compute MAXSTRING for numeric 
column"));
+    }
+
+    // Test exception in aggregateGroupBySV method
+    try {
+      function.aggregateGroupBySV(10, new int[10], groupByResultHolder, 
blockValSetMap);
+      fail("Should throw BadQueryRequestException");
+    } catch (BadQueryRequestException e) {
+      assertTrue(e.getMessage().contains("Cannot compute MAXSTRING for numeric 
column"));
+    }
+
+    // Test exception in aggregateGroupByMV method
+    try {
+      function.aggregateGroupByMV(10, new int[10][], groupByResultHolder, 
blockValSetMap);
+      fail("Should throw BadQueryRequestException");
+    } catch (BadQueryRequestException e) {
+      assertTrue(e.getMessage().contains("Cannot compute MAXSTRING for numeric 
column"));
+    }
+  }
+
+  @Test
+  public void testFunctionBasics() {
+    ExpressionContext expression = RequestContextUtils.getExpression("column");
+    MaxStringAggregationFunction function = new 
MaxStringAggregationFunction(Collections.singletonList(expression),
+        false);
+
+    // Test function type
+    assertEquals(function.getType(), AggregationFunctionType.MAXSTRING);
+
+    // Test string comparisons
+    assertEquals(function.merge("apple", "banana"), "banana");
+    assertEquals(function.merge("banana", "apple"), "banana");
+    assertEquals(function.merge("", "apple"), "apple");
+    assertEquals(function.merge("apple", ""), "apple");
+
+    // Test null handling
+    assertEquals(function.merge("apple", null), "apple");
+    assertEquals(function.merge(null, "apple"), "apple");
+    assertNull(function.merge(null, null));

Review Comment:
   Probably we could add a test that verifies that 
`assertEquals(function.merge("apple", "null"), "null")`



##########
pinot-core/src/main/java/org/apache/pinot/core/query/aggregation/function/MaxStringAggregationFunction.java:
##########
@@ -0,0 +1,173 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.pinot.core.query.aggregation.function;
+
+import java.util.List;
+import java.util.Map;
+import org.apache.pinot.common.request.context.ExpressionContext;
+import org.apache.pinot.common.utils.DataSchema.ColumnDataType;
+import org.apache.pinot.core.common.BlockValSet;
+import org.apache.pinot.core.query.aggregation.AggregationResultHolder;
+import org.apache.pinot.core.query.aggregation.ObjectAggregationResultHolder;
+import org.apache.pinot.core.query.aggregation.groupby.GroupByResultHolder;
+import 
org.apache.pinot.core.query.aggregation.groupby.ObjectGroupByResultHolder;
+import org.apache.pinot.segment.spi.AggregationFunctionType;
+import org.apache.pinot.spi.exception.BadQueryRequestException;
+
+
+public class MaxStringAggregationFunction extends 
NullableSingleInputAggregationFunction<String, String> {
+
+  public MaxStringAggregationFunction(List<ExpressionContext> arguments, 
boolean nullHandlingEnabled) {
+    super(verifySingleArgument(arguments, "MAXSTRING"), nullHandlingEnabled);
+  }
+
+  @Override
+  public AggregationFunctionType getType() {
+    return AggregationFunctionType.MAXSTRING;
+  }
+
+  @Override
+  public AggregationResultHolder createAggregationResultHolder() {
+    return new ObjectAggregationResultHolder();
+  }
+
+  @Override
+  public GroupByResultHolder createGroupByResultHolder(int initialCapacity, 
int maxCapacity) {
+    return new ObjectGroupByResultHolder(initialCapacity, maxCapacity);
+  }
+
+  @Override
+  public void aggregate(int length, AggregationResultHolder 
aggregationResultHolder,
+      Map<ExpressionContext, BlockValSet> blockValSetMap) {
+    BlockValSet blockValSet = blockValSetMap.get(_expression);
+    if (blockValSet.getValueType().isNumeric()) {
+      throw new BadQueryRequestException("Cannot compute MAXSTRING for numeric 
column: "
+          + blockValSet.getValueType());
+    }
+    String[] stringValues = blockValSet.getStringValuesSV();
+    forEachNotNull(length, blockValSet, (from, to) -> {
+      for (int i = from; i < to; i++) {
+        String value = stringValues[i];
+        // Ignore null and "null" string literals
+        if (value == null || "null".equals(value)) {
+          continue;
+        }
+        String currentMax = aggregationResultHolder.getResult();
+        // Update the currentMax if a larger string value is found
+        if (currentMax == null || value.compareTo(currentMax) > 0) {
+          aggregationResultHolder.setValue(value);
+        }

Review Comment:
   nit: It will probably be better to use the same pattern used in 
`MaxAggregationFunction`, where we don't call 
`aggregationResultHolder.setValue` on each iteration, but instead we return the 
local maximum and then compare it with the aggregation result holder.



##########
pinot-core/src/main/java/org/apache/pinot/core/query/aggregation/function/MaxStringAggregationFunction.java:
##########
@@ -0,0 +1,173 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.pinot.core.query.aggregation.function;
+
+import java.util.List;
+import java.util.Map;
+import org.apache.pinot.common.request.context.ExpressionContext;
+import org.apache.pinot.common.utils.DataSchema.ColumnDataType;
+import org.apache.pinot.core.common.BlockValSet;
+import org.apache.pinot.core.query.aggregation.AggregationResultHolder;
+import org.apache.pinot.core.query.aggregation.ObjectAggregationResultHolder;
+import org.apache.pinot.core.query.aggregation.groupby.GroupByResultHolder;
+import 
org.apache.pinot.core.query.aggregation.groupby.ObjectGroupByResultHolder;
+import org.apache.pinot.segment.spi.AggregationFunctionType;
+import org.apache.pinot.spi.exception.BadQueryRequestException;
+
+
+public class MaxStringAggregationFunction extends 
NullableSingleInputAggregationFunction<String, String> {
+
+  public MaxStringAggregationFunction(List<ExpressionContext> arguments, 
boolean nullHandlingEnabled) {
+    super(verifySingleArgument(arguments, "MAXSTRING"), nullHandlingEnabled);
+  }
+
+  @Override
+  public AggregationFunctionType getType() {
+    return AggregationFunctionType.MAXSTRING;
+  }
+
+  @Override
+  public AggregationResultHolder createAggregationResultHolder() {
+    return new ObjectAggregationResultHolder();
+  }
+
+  @Override
+  public GroupByResultHolder createGroupByResultHolder(int initialCapacity, 
int maxCapacity) {
+    return new ObjectGroupByResultHolder(initialCapacity, maxCapacity);
+  }
+
+  @Override
+  public void aggregate(int length, AggregationResultHolder 
aggregationResultHolder,
+      Map<ExpressionContext, BlockValSet> blockValSetMap) {
+    BlockValSet blockValSet = blockValSetMap.get(_expression);
+    if (blockValSet.getValueType().isNumeric()) {
+      throw new BadQueryRequestException("Cannot compute MAXSTRING for numeric 
column: "
+          + blockValSet.getValueType());
+    }
+    String[] stringValues = blockValSet.getStringValuesSV();
+    forEachNotNull(length, blockValSet, (from, to) -> {
+      for (int i = from; i < to; i++) {
+        String value = stringValues[i];
+        // Ignore null and "null" string literals
+        if (value == null || "null".equals(value)) {
+          continue;
+        }
+        String currentMax = aggregationResultHolder.getResult();
+        // Update the currentMax if a larger string value is found
+        if (currentMax == null || value.compareTo(currentMax) > 0) {
+          aggregationResultHolder.setValue(value);
+        }
+      }
+    });
+  }
+
+  @Override
+  public void aggregateGroupBySV(int length, int[] groupKeyArray, 
GroupByResultHolder groupByResultHolder,
+      Map<ExpressionContext, BlockValSet> blockValSetMap) {
+    BlockValSet blockValSet = blockValSetMap.get(_expression);
+    if (blockValSet.getValueType().isNumeric()) {
+      throw new BadQueryRequestException("Cannot compute MAXSTRING for numeric 
column: "
+          + blockValSet.getValueType());
+    }
+    String[] stringValues = blockValSet.getStringValuesSV();
+    forEachNotNull(length, blockValSet, (from, to) -> {
+      for (int i = from; i < to; i++) {
+        String value = stringValues[i];
+        // For SV, "null" as a string literal can exist and needs to be handled
+        if (value == null || "null".equals(value)) {
+          continue;
+        }

Review Comment:
   same here, we should skip that



##########
pinot-core/src/test/java/org/apache/pinot/core/query/aggregation/function/MaxStringAggregationFunctionTest.java:
##########
@@ -0,0 +1,287 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.pinot.core.query.aggregation.function;
+
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.Map;
+import org.apache.pinot.common.request.context.ExpressionContext;
+import org.apache.pinot.common.request.context.RequestContextUtils;
+import org.apache.pinot.core.common.BlockValSet;
+import org.apache.pinot.core.query.aggregation.AggregationResultHolder;
+import org.apache.pinot.core.query.aggregation.groupby.GroupByResultHolder;
+import org.apache.pinot.queries.FluentQueryTest;
+import org.apache.pinot.segment.spi.AggregationFunctionType;
+import org.apache.pinot.spi.data.FieldSpec;
+import org.apache.pinot.spi.data.Schema;
+import org.apache.pinot.spi.exception.BadQueryRequestException;
+import org.testng.annotations.Test;
+
+import static org.mockito.Mockito.mock;
+import static org.mockito.Mockito.when;
+import static org.testng.Assert.assertEquals;
+import static org.testng.Assert.assertNull;
+import static org.testng.Assert.assertTrue;
+import static org.testng.Assert.fail;
+
+
+public class MaxStringAggregationFunctionTest extends 
AbstractAggregationFunctionTest {
+
+  /**
+   * Helper method to create a FluentQueryTest builder for a table with a 
single String field.
+   * This is used to simulate the DataTypeScenario concept from numeric 
aggregation tests,
+   * but fixed for the STRING data type.
+   */
+  protected FluentQueryTest.DeclaringTable getDeclaringTable(boolean 
enableColumnBasedNullHandling) {
+    return FluentQueryTest.withBaseDir(_baseDir)
+        .givenTable(
+            new Schema.SchemaBuilder()
+                .setSchemaName("testTable")
+                
.setEnableColumnBasedNullHandling(enableColumnBasedNullHandling)
+                .addSingleValueDimension("myField", FieldSpec.DataType.STRING)
+                .build(), SINGLE_FIELD_TABLE_CONFIG);
+  }
+
+  @Test
+  public void testNumericColumnException() {
+    ExpressionContext expression = RequestContextUtils.getExpression("column");
+    MaxStringAggregationFunction function = new 
MaxStringAggregationFunction(Collections.singletonList(expression),
+        false);
+
+    AggregationResultHolder resultHolder = 
function.createAggregationResultHolder();
+    GroupByResultHolder groupByResultHolder = 
function.createGroupByResultHolder(10, 20);
+
+    Map<ExpressionContext, BlockValSet> blockValSetMap = new HashMap<>();
+    BlockValSet mockBlockValSet = mock(BlockValSet.class);
+    when(mockBlockValSet.getValueType()).thenReturn(FieldSpec.DataType.INT);
+    blockValSetMap.put(expression, mockBlockValSet);
+
+    // Test exception in aggregate method
+    try {
+      function.aggregate(10, resultHolder, blockValSetMap);
+      fail("Should throw BadQueryRequestException");
+    } catch (BadQueryRequestException e) {
+      assertTrue(e.getMessage().contains("Cannot compute MAXSTRING for numeric 
column"));
+    }
+
+    // Test exception in aggregateGroupBySV method
+    try {
+      function.aggregateGroupBySV(10, new int[10], groupByResultHolder, 
blockValSetMap);
+      fail("Should throw BadQueryRequestException");
+    } catch (BadQueryRequestException e) {
+      assertTrue(e.getMessage().contains("Cannot compute MAXSTRING for numeric 
column"));
+    }
+
+    // Test exception in aggregateGroupByMV method
+    try {
+      function.aggregateGroupByMV(10, new int[10][], groupByResultHolder, 
blockValSetMap);
+      fail("Should throw BadQueryRequestException");
+    } catch (BadQueryRequestException e) {
+      assertTrue(e.getMessage().contains("Cannot compute MAXSTRING for numeric 
column"));
+    }
+  }
+
+  @Test
+  public void testFunctionBasics() {
+    ExpressionContext expression = RequestContextUtils.getExpression("column");
+    MaxStringAggregationFunction function = new 
MaxStringAggregationFunction(Collections.singletonList(expression),
+        false);
+
+    // Test function type
+    assertEquals(function.getType(), AggregationFunctionType.MAXSTRING);
+
+    // Test string comparisons
+    assertEquals(function.merge("apple", "banana"), "banana");
+    assertEquals(function.merge("banana", "apple"), "banana");
+    assertEquals(function.merge("", "apple"), "apple");
+    assertEquals(function.merge("apple", ""), "apple");
+
+    // Test null handling
+    assertEquals(function.merge("apple", null), "apple");
+    assertEquals(function.merge(null, "apple"), "apple");
+    assertNull(function.merge(null, null));
+
+    // Test final result merging
+    assertEquals(function.mergeFinalResult("apple", "banana"), "banana");
+  }
+
+  @Test
+  void aggregationAllNullsWithNullHandlingDisabled() {
+    // For MAXSTRING, when null handling is disabled, and all values are null,
+    // the result should be 'null' as there's no valid string to compare.
+    // This differs from numeric MAX/MIN which might return an initial default 
value.
+    getDeclaringTable(false) // nullHandlingEnabled = false
+        .onFirstInstance("myField",
+            "null",
+            "null"
+        ).andOnSecondInstance("myField",
+            "null"
+        ).whenQuery("select maxstring(myField) from testTable")
+        .thenResultIs("STRING", "\"null\""); // Asserting "null" as a string 
literal for the result
+  }
+
+  @Test
+  void aggregationAllNullsWithNullHandlingEnabled() {
+    // When null handling is enabled, and all values are null, the result 
should also be 'null'.
+    getDeclaringTable(true) // nullHandlingEnabled = true
+        .onFirstInstance("myField",
+            "null",
+            "null"
+        ).andOnSecondInstance("myField",
+            "null"
+        ).whenQuery("select maxstring(myField) from testTable")
+        .thenResultIs("STRING", "\"null\""); // Asserting "null" as a string 
literal for the result
+  }
+
+  @Test
+  void aggregationGroupBySVAllNullsWithNullHandlingDisabled() {
+    // For group by, if all values in a group are null and null handling is 
disabled,
+    // the group's result for MAXSTRING should be 'null'.
+    getDeclaringTable(false) // nullHandlingEnabled = false
+        .onFirstInstance("myField",
+            "null",
+            "null"
+        ).andOnSecondInstance("myField",
+            "null"
+        ).whenQuery("select 'literal', maxstring(myField) from testTable group 
by 'literal'")
+        // Expected "null" as a string literal for the aggregated column
+        .thenResultIs("STRING | STRING", "literal | \"null\"");
+  }
+
+  @Test
+  void aggregationGroupBySVAllNullsWithNullHandlingEnabled() {
+    // For group by, if all values in a group are null and null handling is 
enabled,
+    // the group's result for MAXSTRING should be 'null'.
+    getDeclaringTable(true) // nullHandlingEnabled = true
+        .onFirstInstance("myField",
+            "null",
+            "null"
+        ).andOnSecondInstance("myField",
+            "null"
+        ).whenQuery("select 'literal', maxstring(myField) from testTable group 
by 'literal'")
+        .thenResultIs("STRING | STRING", "literal | \"null\"");
+  }
+
+  @Test
+  void aggregationWithNullHandlingDisabled() {
+    // With null handling disabled, null values are effectively skipped, and 
the maximum non-null
+    // string should be found. The updated function handles this correctly.
+    getDeclaringTable(false) // nullHandlingEnabled = false
+        .onFirstInstance("myField",
+            "cat",
+            "null",
+            "apple"
+        ).andOnSecondInstance("myField",
+            "null",
+            "zebra",
+            "null"
+        ).whenQuery("select maxstring(myField) from testTable")
+        .thenResultIs("STRING", "zebra"); // Max of {"cat", "apple", "zebra"} 
is "zebra"
+  }
+
+  @Test
+  void aggregationWithNullHandlingEnabled() {
+    // With null handling enabled, null values are explicitly ignored, and the 
maximum non-null
+    // string should be found. The updated function handles this correctly.
+    getDeclaringTable(true) // nullHandlingEnabled = true
+        .onFirstInstance("myField",
+            "cat",
+            "null",
+            "apple"
+        ).andOnSecondInstance("myField",
+            "null",
+            "zebra",
+            "null"
+        ).whenQuery("select maxstring(myField) from testTable")
+        .thenResultIs("STRING", "zebra"); // Max of {"cat", "apple", "zebra"} 
is "zebra"
+  }
+
+  @Test
+  void aggregationGroupBySVWithNullHandlingDisabled() {
+    // Group By on a single value (SV) column with mixed nulls and non-nulls.
+    // Null handling disabled: nulls are ignored if there's at least one 
non-null value in the group.
+    // The updated function should now correctly find the max among non-nulls.
+    getDeclaringTable(false) // nullHandlingEnabled = false
+        .onFirstInstance("myField",
+            "alpha", // Grouped with 'literal'
+            "null",  // Grouped with 'literal'
+            "gamma"  // Grouped with 'literal'
+        ).andOnSecondInstance("myField",
+            "null",  // Grouped with 'literal'
+            "beta",  // Grouped with 'literal'
+            "null"   // Grouped with 'literal'
+        ).whenQuery("select 'literal', maxstring(myField) from testTable group 
by 'literal'")
+        .thenResultIs("STRING | STRING", "literal | gamma"); // Max of 
{"alpha", "gamma", "beta"} is "gamma"
+  }
+
+  @Test
+  void aggregationGroupBySVWithNullHandlingEnabled() {
+    // Group By on a single value (SV) column with mixed nulls and non-nulls.
+    // Null handling enabled: nulls are ignored.
+    // The updated function should now correctly find the max among non-nulls.
+    getDeclaringTable(true) // nullHandlingEnabled = true
+        .onFirstInstance("myField",
+            "alpha", // Grouped with 'literal'
+            "null",  // Grouped with 'literal'
+            "gamma"  // Grouped with 'literal'
+        ).andOnSecondInstance("myField",
+            "null",  // Grouped with 'literal'
+            "beta",  // Grouped with 'literal'
+            "null"   // Grouped with 'literal'
+        ).whenQuery("select 'literal', maxstring(myField) from testTable group 
by 'literal'")
+        .thenResultIs("STRING | STRING", "literal | gamma"); // Max of 
{"alpha", "gamma", "beta"} is "gamma"
+  }
+
+  @Test
+  void aggregationGroupByMV() {
+    FluentQueryTest.withBaseDir(_baseDir)
+        .givenTable(
+            new Schema.SchemaBuilder()
+                .setSchemaName("testTable")
+                .setEnableColumnBasedNullHandling(true) // Set at schema level 
for general behavior
+                .addMultiValueDimension("tags", FieldSpec.DataType.STRING) // 
Dimension for tags
+                .addDimensionField("value", FieldSpec.DataType.STRING)
+                .build(), SINGLE_FIELD_TABLE_CONFIG)
+        .onFirstInstance(
+            new Object[]{"tag1;tag2", "banana"}, // Row 1: tag1 -> "banana", 
tag2 -> "banana"
+            new Object[]{"tag2;tag3", null}      // Row 2: tag2 -> null, tag3 
-> null
+        )
+        .andOnSecondInstance(
+            new Object[]{"tag1;tag2", "apple"},   // Row 3: tag1 -> "apple", 
tag2 -> "apple"
+            new Object[]{"tag2;tag3", "cherry"}  // Row 4: tag2 -> "cherry", 
tag3 -> "cherry"
+        )
+        // Query without explicit null handling enabled via query option (uses 
table schema setting or default)
+        .whenQuery("select tags, MAXSTRING(value) from testTable group by tags 
order by tags")
+        .thenResultIs(
+            "STRING | STRING",
+            "tag1    | banana", // Values for tag1: "banana", "apple". Max is 
"banana".
+            "tag2    | cherry",
+            "tag3    | cherry"  // Values for tag3: null, "cherry". Max is 
"cherry".

Review Comment:
   This is incorrect. For values `null` (which is read as `"null"` when null 
handling is disabled) and `"cherry"` the max value is `"null"`



##########
pinot-core/src/main/java/org/apache/pinot/core/query/aggregation/function/MinStringAggregationFunction.java:
##########
@@ -0,0 +1,173 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.pinot.core.query.aggregation.function;
+
+import java.util.List;
+import java.util.Map;
+import org.apache.pinot.common.request.context.ExpressionContext;
+import org.apache.pinot.common.utils.DataSchema.ColumnDataType;
+import org.apache.pinot.core.common.BlockValSet;
+import org.apache.pinot.core.query.aggregation.AggregationResultHolder;
+import org.apache.pinot.core.query.aggregation.ObjectAggregationResultHolder;
+import org.apache.pinot.core.query.aggregation.groupby.GroupByResultHolder;
+import 
org.apache.pinot.core.query.aggregation.groupby.ObjectGroupByResultHolder;
+import org.apache.pinot.segment.spi.AggregationFunctionType;
+import org.apache.pinot.spi.exception.BadQueryRequestException;
+
+
+public class MinStringAggregationFunction extends 
NullableSingleInputAggregationFunction<String, String> {
+
+  public MinStringAggregationFunction(List<ExpressionContext> arguments, 
boolean nullHandlingEnabled) {
+    super(verifySingleArgument(arguments, "MINSTRING"), nullHandlingEnabled);
+  }
+
+  @Override
+  public AggregationFunctionType getType() {
+    return AggregationFunctionType.MINSTRING;
+  }
+
+  @Override
+  public AggregationResultHolder createAggregationResultHolder() {
+    return new ObjectAggregationResultHolder();
+  }
+
+  @Override
+  public GroupByResultHolder createGroupByResultHolder(int initialCapacity, 
int maxCapacity) {
+    return new ObjectGroupByResultHolder(initialCapacity, maxCapacity);
+  }
+
+  @Override
+  public void aggregate(int length, AggregationResultHolder 
aggregationResultHolder,
+      Map<ExpressionContext, BlockValSet> blockValSetMap) {
+    BlockValSet blockValSet = blockValSetMap.get(_expression);
+    if (blockValSet.getValueType().isNumeric()) {
+      throw new BadQueryRequestException("Cannot compute MINSTRING for numeric 
column: "
+          + blockValSet.getValueType());
+    }
+    String[] stringValues = blockValSet.getStringValuesSV();
+    forEachNotNull(length, blockValSet, (from, to) -> {
+      for (int i = from; i < to; i++) {
+        String value = stringValues[i];
+        // Ignore null and "null" string literals
+        if (value == null || "null".equals(value)) {
+          continue;
+        }

Review Comment:
   Same as in Max function. I'm goign to ignore this and the `"null"` problem 
in this class as it is the same in both of them



##########
pinot-core/src/main/java/org/apache/pinot/core/query/aggregation/function/MaxStringAggregationFunction.java:
##########
@@ -0,0 +1,173 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.pinot.core.query.aggregation.function;
+
+import java.util.List;
+import java.util.Map;
+import org.apache.pinot.common.request.context.ExpressionContext;
+import org.apache.pinot.common.utils.DataSchema.ColumnDataType;
+import org.apache.pinot.core.common.BlockValSet;
+import org.apache.pinot.core.query.aggregation.AggregationResultHolder;
+import org.apache.pinot.core.query.aggregation.ObjectAggregationResultHolder;
+import org.apache.pinot.core.query.aggregation.groupby.GroupByResultHolder;
+import 
org.apache.pinot.core.query.aggregation.groupby.ObjectGroupByResultHolder;
+import org.apache.pinot.segment.spi.AggregationFunctionType;
+import org.apache.pinot.spi.exception.BadQueryRequestException;
+
+
+public class MaxStringAggregationFunction extends 
NullableSingleInputAggregationFunction<String, String> {
+
+  public MaxStringAggregationFunction(List<ExpressionContext> arguments, 
boolean nullHandlingEnabled) {
+    super(verifySingleArgument(arguments, "MAXSTRING"), nullHandlingEnabled);
+  }
+
+  @Override
+  public AggregationFunctionType getType() {
+    return AggregationFunctionType.MAXSTRING;
+  }
+
+  @Override
+  public AggregationResultHolder createAggregationResultHolder() {
+    return new ObjectAggregationResultHolder();
+  }
+
+  @Override
+  public GroupByResultHolder createGroupByResultHolder(int initialCapacity, 
int maxCapacity) {
+    return new ObjectGroupByResultHolder(initialCapacity, maxCapacity);
+  }
+
+  @Override
+  public void aggregate(int length, AggregationResultHolder 
aggregationResultHolder,
+      Map<ExpressionContext, BlockValSet> blockValSetMap) {
+    BlockValSet blockValSet = blockValSetMap.get(_expression);
+    if (blockValSet.getValueType().isNumeric()) {
+      throw new BadQueryRequestException("Cannot compute MAXSTRING for numeric 
column: "
+          + blockValSet.getValueType());
+    }
+    String[] stringValues = blockValSet.getStringValuesSV();
+    forEachNotNull(length, blockValSet, (from, to) -> {
+      for (int i = from; i < to; i++) {
+        String value = stringValues[i];
+        // Ignore null and "null" string literals
+        if (value == null || "null".equals(value)) {
+          continue;
+        }
+        String currentMax = aggregationResultHolder.getResult();
+        // Update the currentMax if a larger string value is found
+        if (currentMax == null || value.compareTo(currentMax) > 0) {
+          aggregationResultHolder.setValue(value);
+        }
+      }
+    });
+  }
+
+  @Override
+  public void aggregateGroupBySV(int length, int[] groupKeyArray, 
GroupByResultHolder groupByResultHolder,
+      Map<ExpressionContext, BlockValSet> blockValSetMap) {
+    BlockValSet blockValSet = blockValSetMap.get(_expression);
+    if (blockValSet.getValueType().isNumeric()) {
+      throw new BadQueryRequestException("Cannot compute MAXSTRING for numeric 
column: "
+          + blockValSet.getValueType());
+    }
+    String[] stringValues = blockValSet.getStringValuesSV();
+    forEachNotNull(length, blockValSet, (from, to) -> {
+      for (int i = from; i < to; i++) {
+        String value = stringValues[i];
+        // For SV, "null" as a string literal can exist and needs to be handled
+        if (value == null || "null".equals(value)) {
+          continue;
+        }
+        int groupKey = groupKeyArray[i];
+        String currentMax = groupByResultHolder.getResult(groupKey);
+        if (currentMax == null || "null".equals(currentMax) || 
value.compareTo(currentMax) > 0) {
+          groupByResultHolder.setValueForKey(groupKey, value);
+        }
+      }
+    });
+  }
+
+  @Override
+  public void aggregateGroupByMV(int length, int[][] groupKeysArray, 
GroupByResultHolder groupByResultHolder,
+      Map<ExpressionContext, BlockValSet> blockValSetMap) {
+    BlockValSet blockValSet = blockValSetMap.get(_expression);
+    if (blockValSet.getValueType().isNumeric()) {
+      throw new BadQueryRequestException("Cannot compute MAXSTRING for numeric 
column: "
+          + blockValSet.getValueType());
+    }
+    String[] stringValues = blockValSet.getStringValuesSV();
+    forEachNotNull(length, blockValSet, (from, to) -> {
+      for (int i = from; i < to; i++) {
+        String value = stringValues[i];
+        // For MV, "null" as a string literal can exist and needs to be handled
+        if (value == null || "null".equals(value)) {
+          continue;
+        }
+        for (int groupKey : groupKeysArray[i]) {
+          String currentMax = groupByResultHolder.getResult(groupKey);
+          if (currentMax == null || "null".equals(currentMax) || 
value.compareTo(currentMax) > 0) {
+            groupByResultHolder.setValueForKey(groupKey, value);
+          }
+        }
+      }
+    });
+  }
+
+  @Override
+  public String extractAggregationResult(AggregationResultHolder 
aggregationResultHolder) {
+    String result = aggregationResultHolder.getResult();
+    return result != null ? result : "null";
+  }
+
+  @Override
+  public String extractGroupByResult(GroupByResultHolder groupByResultHolder, 
int groupKey) {
+    String result = groupByResultHolder.getResult(groupKey);
+    return result != null ? result : "null";
+  }
+
+  @Override
+  public String merge(String intermediateResult1, String intermediateResult2) {
+    if (intermediateResult1 == null || "null".equals(intermediateResult1)) {
+      return intermediateResult2;
+    }
+    if (intermediateResult2 == null || "null".equals(intermediateResult2)) {
+      return intermediateResult1;
+    }

Review Comment:
   Same here. `"null"` is a valid value



##########
pinot-core/src/main/java/org/apache/pinot/core/query/aggregation/function/MaxStringAggregationFunction.java:
##########
@@ -0,0 +1,173 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.pinot.core.query.aggregation.function;
+
+import java.util.List;
+import java.util.Map;
+import org.apache.pinot.common.request.context.ExpressionContext;
+import org.apache.pinot.common.utils.DataSchema.ColumnDataType;
+import org.apache.pinot.core.common.BlockValSet;
+import org.apache.pinot.core.query.aggregation.AggregationResultHolder;
+import org.apache.pinot.core.query.aggregation.ObjectAggregationResultHolder;
+import org.apache.pinot.core.query.aggregation.groupby.GroupByResultHolder;
+import 
org.apache.pinot.core.query.aggregation.groupby.ObjectGroupByResultHolder;
+import org.apache.pinot.segment.spi.AggregationFunctionType;
+import org.apache.pinot.spi.exception.BadQueryRequestException;
+
+
+public class MaxStringAggregationFunction extends 
NullableSingleInputAggregationFunction<String, String> {
+
+  public MaxStringAggregationFunction(List<ExpressionContext> arguments, 
boolean nullHandlingEnabled) {
+    super(verifySingleArgument(arguments, "MAXSTRING"), nullHandlingEnabled);
+  }
+
+  @Override
+  public AggregationFunctionType getType() {
+    return AggregationFunctionType.MAXSTRING;
+  }
+
+  @Override
+  public AggregationResultHolder createAggregationResultHolder() {
+    return new ObjectAggregationResultHolder();
+  }
+
+  @Override
+  public GroupByResultHolder createGroupByResultHolder(int initialCapacity, 
int maxCapacity) {
+    return new ObjectGroupByResultHolder(initialCapacity, maxCapacity);
+  }
+
+  @Override
+  public void aggregate(int length, AggregationResultHolder 
aggregationResultHolder,
+      Map<ExpressionContext, BlockValSet> blockValSetMap) {
+    BlockValSet blockValSet = blockValSetMap.get(_expression);
+    if (blockValSet.getValueType().isNumeric()) {
+      throw new BadQueryRequestException("Cannot compute MAXSTRING for numeric 
column: "
+          + blockValSet.getValueType());
+    }
+    String[] stringValues = blockValSet.getStringValuesSV();
+    forEachNotNull(length, blockValSet, (from, to) -> {
+      for (int i = from; i < to; i++) {
+        String value = stringValues[i];
+        // Ignore null and "null" string literals
+        if (value == null || "null".equals(value)) {
+          continue;
+        }
+        String currentMax = aggregationResultHolder.getResult();
+        // Update the currentMax if a larger string value is found
+        if (currentMax == null || value.compareTo(currentMax) > 0) {
+          aggregationResultHolder.setValue(value);
+        }
+      }
+    });
+  }
+
+  @Override
+  public void aggregateGroupBySV(int length, int[] groupKeyArray, 
GroupByResultHolder groupByResultHolder,
+      Map<ExpressionContext, BlockValSet> blockValSetMap) {
+    BlockValSet blockValSet = blockValSetMap.get(_expression);
+    if (blockValSet.getValueType().isNumeric()) {
+      throw new BadQueryRequestException("Cannot compute MAXSTRING for numeric 
column: "
+          + blockValSet.getValueType());
+    }
+    String[] stringValues = blockValSet.getStringValuesSV();
+    forEachNotNull(length, blockValSet, (from, to) -> {
+      for (int i = from; i < to; i++) {
+        String value = stringValues[i];
+        // For SV, "null" as a string literal can exist and needs to be handled
+        if (value == null || "null".equals(value)) {
+          continue;
+        }
+        int groupKey = groupKeyArray[i];
+        String currentMax = groupByResultHolder.getResult(groupKey);
+        if (currentMax == null || "null".equals(currentMax) || 
value.compareTo(currentMax) > 0) {

Review Comment:
   We need to remove the "null".equals(...) here. "null" may be a valid value



##########
pinot-core/src/main/java/org/apache/pinot/core/query/aggregation/function/MaxStringAggregationFunction.java:
##########
@@ -0,0 +1,173 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.pinot.core.query.aggregation.function;
+
+import java.util.List;
+import java.util.Map;
+import org.apache.pinot.common.request.context.ExpressionContext;
+import org.apache.pinot.common.utils.DataSchema.ColumnDataType;
+import org.apache.pinot.core.common.BlockValSet;
+import org.apache.pinot.core.query.aggregation.AggregationResultHolder;
+import org.apache.pinot.core.query.aggregation.ObjectAggregationResultHolder;
+import org.apache.pinot.core.query.aggregation.groupby.GroupByResultHolder;
+import 
org.apache.pinot.core.query.aggregation.groupby.ObjectGroupByResultHolder;
+import org.apache.pinot.segment.spi.AggregationFunctionType;
+import org.apache.pinot.spi.exception.BadQueryRequestException;
+
+
+public class MaxStringAggregationFunction extends 
NullableSingleInputAggregationFunction<String, String> {
+
+  public MaxStringAggregationFunction(List<ExpressionContext> arguments, 
boolean nullHandlingEnabled) {
+    super(verifySingleArgument(arguments, "MAXSTRING"), nullHandlingEnabled);
+  }
+
+  @Override
+  public AggregationFunctionType getType() {
+    return AggregationFunctionType.MAXSTRING;
+  }
+
+  @Override
+  public AggregationResultHolder createAggregationResultHolder() {
+    return new ObjectAggregationResultHolder();
+  }
+
+  @Override
+  public GroupByResultHolder createGroupByResultHolder(int initialCapacity, 
int maxCapacity) {
+    return new ObjectGroupByResultHolder(initialCapacity, maxCapacity);
+  }
+
+  @Override
+  public void aggregate(int length, AggregationResultHolder 
aggregationResultHolder,
+      Map<ExpressionContext, BlockValSet> blockValSetMap) {
+    BlockValSet blockValSet = blockValSetMap.get(_expression);
+    if (blockValSet.getValueType().isNumeric()) {
+      throw new BadQueryRequestException("Cannot compute MAXSTRING for numeric 
column: "
+          + blockValSet.getValueType());
+    }
+    String[] stringValues = blockValSet.getStringValuesSV();
+    forEachNotNull(length, blockValSet, (from, to) -> {
+      for (int i = from; i < to; i++) {
+        String value = stringValues[i];
+        // Ignore null and "null" string literals
+        if (value == null || "null".equals(value)) {

Review Comment:
   There are two issues here:
   1. You don't need to verify if the value is `null` or not. It cannot be null 
because that is an invariant of `forEachNotNull`.
   2. We cannot ignore the `"null"` literal, given that it can be a valid value 
for a dataset.
   
   It think the correct thing to do is to remove this if.



##########
pinot-core/src/main/java/org/apache/pinot/core/query/aggregation/function/MaxStringAggregationFunction.java:
##########
@@ -0,0 +1,173 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.pinot.core.query.aggregation.function;
+
+import java.util.List;
+import java.util.Map;
+import org.apache.pinot.common.request.context.ExpressionContext;
+import org.apache.pinot.common.utils.DataSchema.ColumnDataType;
+import org.apache.pinot.core.common.BlockValSet;
+import org.apache.pinot.core.query.aggregation.AggregationResultHolder;
+import org.apache.pinot.core.query.aggregation.ObjectAggregationResultHolder;
+import org.apache.pinot.core.query.aggregation.groupby.GroupByResultHolder;
+import 
org.apache.pinot.core.query.aggregation.groupby.ObjectGroupByResultHolder;
+import org.apache.pinot.segment.spi.AggregationFunctionType;
+import org.apache.pinot.spi.exception.BadQueryRequestException;
+
+
+public class MaxStringAggregationFunction extends 
NullableSingleInputAggregationFunction<String, String> {
+
+  public MaxStringAggregationFunction(List<ExpressionContext> arguments, 
boolean nullHandlingEnabled) {
+    super(verifySingleArgument(arguments, "MAXSTRING"), nullHandlingEnabled);
+  }
+
+  @Override
+  public AggregationFunctionType getType() {
+    return AggregationFunctionType.MAXSTRING;
+  }
+
+  @Override
+  public AggregationResultHolder createAggregationResultHolder() {
+    return new ObjectAggregationResultHolder();
+  }
+
+  @Override
+  public GroupByResultHolder createGroupByResultHolder(int initialCapacity, 
int maxCapacity) {
+    return new ObjectGroupByResultHolder(initialCapacity, maxCapacity);
+  }
+
+  @Override
+  public void aggregate(int length, AggregationResultHolder 
aggregationResultHolder,
+      Map<ExpressionContext, BlockValSet> blockValSetMap) {
+    BlockValSet blockValSet = blockValSetMap.get(_expression);
+    if (blockValSet.getValueType().isNumeric()) {
+      throw new BadQueryRequestException("Cannot compute MAXSTRING for numeric 
column: "
+          + blockValSet.getValueType());
+    }
+    String[] stringValues = blockValSet.getStringValuesSV();
+    forEachNotNull(length, blockValSet, (from, to) -> {
+      for (int i = from; i < to; i++) {
+        String value = stringValues[i];
+        // Ignore null and "null" string literals
+        if (value == null || "null".equals(value)) {
+          continue;
+        }
+        String currentMax = aggregationResultHolder.getResult();
+        // Update the currentMax if a larger string value is found
+        if (currentMax == null || value.compareTo(currentMax) > 0) {
+          aggregationResultHolder.setValue(value);
+        }
+      }
+    });
+  }
+
+  @Override
+  public void aggregateGroupBySV(int length, int[] groupKeyArray, 
GroupByResultHolder groupByResultHolder,
+      Map<ExpressionContext, BlockValSet> blockValSetMap) {
+    BlockValSet blockValSet = blockValSetMap.get(_expression);
+    if (blockValSet.getValueType().isNumeric()) {
+      throw new BadQueryRequestException("Cannot compute MAXSTRING for numeric 
column: "
+          + blockValSet.getValueType());
+    }
+    String[] stringValues = blockValSet.getStringValuesSV();
+    forEachNotNull(length, blockValSet, (from, to) -> {
+      for (int i = from; i < to; i++) {
+        String value = stringValues[i];
+        // For SV, "null" as a string literal can exist and needs to be handled
+        if (value == null || "null".equals(value)) {
+          continue;
+        }
+        int groupKey = groupKeyArray[i];
+        String currentMax = groupByResultHolder.getResult(groupKey);
+        if (currentMax == null || "null".equals(currentMax) || 
value.compareTo(currentMax) > 0) {
+          groupByResultHolder.setValueForKey(groupKey, value);
+        }
+      }
+    });
+  }
+
+  @Override
+  public void aggregateGroupByMV(int length, int[][] groupKeysArray, 
GroupByResultHolder groupByResultHolder,
+      Map<ExpressionContext, BlockValSet> blockValSetMap) {
+    BlockValSet blockValSet = blockValSetMap.get(_expression);
+    if (blockValSet.getValueType().isNumeric()) {
+      throw new BadQueryRequestException("Cannot compute MAXSTRING for numeric 
column: "
+          + blockValSet.getValueType());
+    }
+    String[] stringValues = blockValSet.getStringValuesSV();
+    forEachNotNull(length, blockValSet, (from, to) -> {
+      for (int i = from; i < to; i++) {
+        String value = stringValues[i];
+        // For MV, "null" as a string literal can exist and needs to be handled
+        if (value == null || "null".equals(value)) {
+          continue;
+        }
+        for (int groupKey : groupKeysArray[i]) {
+          String currentMax = groupByResultHolder.getResult(groupKey);
+          if (currentMax == null || "null".equals(currentMax) || 
value.compareTo(currentMax) > 0) {

Review Comment:
   Same here



##########
pinot-core/src/test/java/org/apache/pinot/core/query/aggregation/function/MaxStringAggregationFunctionTest.java:
##########
@@ -0,0 +1,287 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.pinot.core.query.aggregation.function;
+
+import java.util.Collections;
+import java.util.HashMap;
+import java.util.Map;
+import org.apache.pinot.common.request.context.ExpressionContext;
+import org.apache.pinot.common.request.context.RequestContextUtils;
+import org.apache.pinot.core.common.BlockValSet;
+import org.apache.pinot.core.query.aggregation.AggregationResultHolder;
+import org.apache.pinot.core.query.aggregation.groupby.GroupByResultHolder;
+import org.apache.pinot.queries.FluentQueryTest;
+import org.apache.pinot.segment.spi.AggregationFunctionType;
+import org.apache.pinot.spi.data.FieldSpec;
+import org.apache.pinot.spi.data.Schema;
+import org.apache.pinot.spi.exception.BadQueryRequestException;
+import org.testng.annotations.Test;
+
+import static org.mockito.Mockito.mock;
+import static org.mockito.Mockito.when;
+import static org.testng.Assert.assertEquals;
+import static org.testng.Assert.assertNull;
+import static org.testng.Assert.assertTrue;
+import static org.testng.Assert.fail;
+
+
+public class MaxStringAggregationFunctionTest extends 
AbstractAggregationFunctionTest {
+
+  /**
+   * Helper method to create a FluentQueryTest builder for a table with a 
single String field.
+   * This is used to simulate the DataTypeScenario concept from numeric 
aggregation tests,
+   * but fixed for the STRING data type.
+   */
+  protected FluentQueryTest.DeclaringTable getDeclaringTable(boolean 
enableColumnBasedNullHandling) {
+    return FluentQueryTest.withBaseDir(_baseDir)
+        .givenTable(
+            new Schema.SchemaBuilder()
+                .setSchemaName("testTable")
+                
.setEnableColumnBasedNullHandling(enableColumnBasedNullHandling)
+                .addSingleValueDimension("myField", FieldSpec.DataType.STRING)
+                .build(), SINGLE_FIELD_TABLE_CONFIG);
+  }
+
+  @Test
+  public void testNumericColumnException() {
+    ExpressionContext expression = RequestContextUtils.getExpression("column");
+    MaxStringAggregationFunction function = new 
MaxStringAggregationFunction(Collections.singletonList(expression),
+        false);
+
+    AggregationResultHolder resultHolder = 
function.createAggregationResultHolder();
+    GroupByResultHolder groupByResultHolder = 
function.createGroupByResultHolder(10, 20);
+
+    Map<ExpressionContext, BlockValSet> blockValSetMap = new HashMap<>();
+    BlockValSet mockBlockValSet = mock(BlockValSet.class);
+    when(mockBlockValSet.getValueType()).thenReturn(FieldSpec.DataType.INT);
+    blockValSetMap.put(expression, mockBlockValSet);
+
+    // Test exception in aggregate method
+    try {
+      function.aggregate(10, resultHolder, blockValSetMap);
+      fail("Should throw BadQueryRequestException");
+    } catch (BadQueryRequestException e) {
+      assertTrue(e.getMessage().contains("Cannot compute MAXSTRING for numeric 
column"));
+    }
+
+    // Test exception in aggregateGroupBySV method
+    try {
+      function.aggregateGroupBySV(10, new int[10], groupByResultHolder, 
blockValSetMap);
+      fail("Should throw BadQueryRequestException");
+    } catch (BadQueryRequestException e) {
+      assertTrue(e.getMessage().contains("Cannot compute MAXSTRING for numeric 
column"));
+    }
+
+    // Test exception in aggregateGroupByMV method
+    try {
+      function.aggregateGroupByMV(10, new int[10][], groupByResultHolder, 
blockValSetMap);
+      fail("Should throw BadQueryRequestException");
+    } catch (BadQueryRequestException e) {
+      assertTrue(e.getMessage().contains("Cannot compute MAXSTRING for numeric 
column"));
+    }
+  }
+
+  @Test
+  public void testFunctionBasics() {
+    ExpressionContext expression = RequestContextUtils.getExpression("column");
+    MaxStringAggregationFunction function = new 
MaxStringAggregationFunction(Collections.singletonList(expression),
+        false);
+
+    // Test function type
+    assertEquals(function.getType(), AggregationFunctionType.MAXSTRING);
+
+    // Test string comparisons
+    assertEquals(function.merge("apple", "banana"), "banana");
+    assertEquals(function.merge("banana", "apple"), "banana");
+    assertEquals(function.merge("", "apple"), "apple");
+    assertEquals(function.merge("apple", ""), "apple");
+
+    // Test null handling
+    assertEquals(function.merge("apple", null), "apple");
+    assertEquals(function.merge(null, "apple"), "apple");
+    assertNull(function.merge(null, null));
+
+    // Test final result merging
+    assertEquals(function.mergeFinalResult("apple", "banana"), "banana");
+  }
+
+  @Test
+  void aggregationAllNullsWithNullHandlingDisabled() {
+    // For MAXSTRING, when null handling is disabled, and all values are null,
+    // the result should be 'null' as there's no valid string to compare.
+    // This differs from numeric MAX/MIN which might return an initial default 
value.
+    getDeclaringTable(false) // nullHandlingEnabled = false
+        .onFirstInstance("myField",
+            "null",
+            "null"
+        ).andOnSecondInstance("myField",
+            "null"
+        ).whenQuery("select maxstring(myField) from testTable")
+        .thenResultIs("STRING", "\"null\""); // Asserting "null" as a string 
literal for the result
+  }
+
+  @Test
+  void aggregationAllNullsWithNullHandlingEnabled() {
+    // When null handling is enabled, and all values are null, the result 
should also be 'null'.
+    getDeclaringTable(true) // nullHandlingEnabled = true
+        .onFirstInstance("myField",
+            "null",
+            "null"
+        ).andOnSecondInstance("myField",
+            "null"
+        ).whenQuery("select maxstring(myField) from testTable")
+        .thenResultIs("STRING", "\"null\""); // Asserting "null" as a string 
literal for the result
+  }
+
+  @Test
+  void aggregationGroupBySVAllNullsWithNullHandlingDisabled() {
+    // For group by, if all values in a group are null and null handling is 
disabled,
+    // the group's result for MAXSTRING should be 'null'.
+    getDeclaringTable(false) // nullHandlingEnabled = false
+        .onFirstInstance("myField",
+            "null",
+            "null"
+        ).andOnSecondInstance("myField",
+            "null"
+        ).whenQuery("select 'literal', maxstring(myField) from testTable group 
by 'literal'")
+        // Expected "null" as a string literal for the aggregated column
+        .thenResultIs("STRING | STRING", "literal | \"null\"");
+  }
+
+  @Test
+  void aggregationGroupBySVAllNullsWithNullHandlingEnabled() {
+    // For group by, if all values in a group are null and null handling is 
enabled,
+    // the group's result for MAXSTRING should be 'null'.
+    getDeclaringTable(true) // nullHandlingEnabled = true
+        .onFirstInstance("myField",
+            "null",
+            "null"
+        ).andOnSecondInstance("myField",
+            "null"
+        ).whenQuery("select 'literal', maxstring(myField) from testTable group 
by 'literal'")
+        .thenResultIs("STRING | STRING", "literal | \"null\"");
+  }
+
+  @Test
+  void aggregationWithNullHandlingDisabled() {
+    // With null handling disabled, null values are effectively skipped, and 
the maximum non-null
+    // string should be found. The updated function handles this correctly.
+    getDeclaringTable(false) // nullHandlingEnabled = false
+        .onFirstInstance("myField",
+            "cat",
+            "null",
+            "apple"
+        ).andOnSecondInstance("myField",
+            "null",
+            "zebra",
+            "null"
+        ).whenQuery("select maxstring(myField) from testTable")
+        .thenResultIs("STRING", "zebra"); // Max of {"cat", "apple", "zebra"} 
is "zebra"
+  }
+
+  @Test
+  void aggregationWithNullHandlingEnabled() {
+    // With null handling enabled, null values are explicitly ignored, and the 
maximum non-null
+    // string should be found. The updated function handles this correctly.
+    getDeclaringTable(true) // nullHandlingEnabled = true
+        .onFirstInstance("myField",
+            "cat",
+            "null",
+            "apple"
+        ).andOnSecondInstance("myField",
+            "null",
+            "zebra",
+            "null"
+        ).whenQuery("select maxstring(myField) from testTable")
+        .thenResultIs("STRING", "zebra"); // Max of {"cat", "apple", "zebra"} 
is "zebra"
+  }
+
+  @Test
+  void aggregationGroupBySVWithNullHandlingDisabled() {
+    // Group By on a single value (SV) column with mixed nulls and non-nulls.
+    // Null handling disabled: nulls are ignored if there's at least one 
non-null value in the group.
+    // The updated function should now correctly find the max among non-nulls.
+    getDeclaringTable(false) // nullHandlingEnabled = false
+        .onFirstInstance("myField",
+            "alpha", // Grouped with 'literal'
+            "null",  // Grouped with 'literal'
+            "gamma"  // Grouped with 'literal'
+        ).andOnSecondInstance("myField",
+            "null",  // Grouped with 'literal'
+            "beta",  // Grouped with 'literal'
+            "null"   // Grouped with 'literal'
+        ).whenQuery("select 'literal', maxstring(myField) from testTable group 
by 'literal'")
+        .thenResultIs("STRING | STRING", "literal | gamma"); // Max of 
{"alpha", "gamma", "beta"} is "gamma"
+  }
+
+  @Test
+  void aggregationGroupBySVWithNullHandlingEnabled() {
+    // Group By on a single value (SV) column with mixed nulls and non-nulls.
+    // Null handling enabled: nulls are ignored.
+    // The updated function should now correctly find the max among non-nulls.
+    getDeclaringTable(true) // nullHandlingEnabled = true
+        .onFirstInstance("myField",
+            "alpha", // Grouped with 'literal'
+            "null",  // Grouped with 'literal'
+            "gamma"  // Grouped with 'literal'
+        ).andOnSecondInstance("myField",
+            "null",  // Grouped with 'literal'
+            "beta",  // Grouped with 'literal'
+            "null"   // Grouped with 'literal'
+        ).whenQuery("select 'literal', maxstring(myField) from testTable group 
by 'literal'")
+        .thenResultIs("STRING | STRING", "literal | gamma"); // Max of 
{"alpha", "gamma", "beta"} is "gamma"
+  }
+
+  @Test
+  void aggregationGroupByMV() {
+    FluentQueryTest.withBaseDir(_baseDir)
+        .givenTable(
+            new Schema.SchemaBuilder()
+                .setSchemaName("testTable")
+                .setEnableColumnBasedNullHandling(true) // Set at schema level 
for general behavior
+                .addMultiValueDimension("tags", FieldSpec.DataType.STRING) // 
Dimension for tags
+                .addDimensionField("value", FieldSpec.DataType.STRING)
+                .build(), SINGLE_FIELD_TABLE_CONFIG)
+        .onFirstInstance(
+            new Object[]{"tag1;tag2", "banana"}, // Row 1: tag1 -> "banana", 
tag2 -> "banana"
+            new Object[]{"tag2;tag3", null}      // Row 2: tag2 -> null, tag3 
-> null
+        )
+        .andOnSecondInstance(
+            new Object[]{"tag1;tag2", "apple"},   // Row 3: tag1 -> "apple", 
tag2 -> "apple"
+            new Object[]{"tag2;tag3", "cherry"}  // Row 4: tag2 -> "cherry", 
tag3 -> "cherry"
+        )
+        // Query without explicit null handling enabled via query option (uses 
table schema setting or default)
+        .whenQuery("select tags, MAXSTRING(value) from testTable group by tags 
order by tags")
+        .thenResultIs(
+            "STRING | STRING",
+            "tag1    | banana", // Values for tag1: "banana", "apple". Max is 
"banana".
+            "tag2    | cherry",

Review Comment:
   This is incorrect.
   
   Values for tag2: "banana", null, "apple", "cherry". Max is "null".
   
   This is because when null handling is disabled, the null value is read as 
"null" and we need to honor that.



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to