This is an automated email from the ASF dual-hosted git repository.

siddteotia pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/pinot.git


The following commit(s) were added to refs/heads/master by this push:
     new 0c55422d8b Enhance tests for string type MV Raw columns (#9041)
0c55422d8b is described below

commit 0c55422d8bb5ff0df3e7871c9b1a1e52be38aecc
Author: Sonam Mandal <[email protected]>
AuthorDate: Mon Jul 11 17:52:11 2022 -0700

    Enhance tests for string type MV Raw columns (#9041)
---
 .../pinot/queries/MultiValueRawQueriesTest.java    | 333 ++++++++++++++++++++-
 .../MultiValueVarByteRawIndexCreatorTest.java      |  19 +-
 2 files changed, 329 insertions(+), 23 deletions(-)

diff --git 
a/pinot-core/src/test/java/org/apache/pinot/queries/MultiValueRawQueriesTest.java
 
b/pinot-core/src/test/java/org/apache/pinot/queries/MultiValueRawQueriesTest.java
index cb06de32db..26a0ed2f76 100644
--- 
a/pinot-core/src/test/java/org/apache/pinot/queries/MultiValueRawQueriesTest.java
+++ 
b/pinot-core/src/test/java/org/apache/pinot/queries/MultiValueRawQueriesTest.java
@@ -21,10 +21,13 @@ package org.apache.pinot.queries;
 import java.io.File;
 import java.util.ArrayList;
 import java.util.Arrays;
+import java.util.Collections;
 import java.util.HashSet;
 import java.util.List;
+import java.util.Random;
 import java.util.Set;
 import org.apache.commons.io.FileUtils;
+import org.apache.commons.lang3.RandomStringUtils;
 import org.apache.pinot.common.response.broker.BrokerResponseNative;
 import org.apache.pinot.common.response.broker.ResultTable;
 import org.apache.pinot.common.utils.DataSchema;
@@ -47,6 +50,7 @@ import org.testng.annotations.Test;
 
 import static org.testng.Assert.assertEquals;
 import static org.testng.Assert.assertNotNull;
+import static org.testng.Assert.assertNull;
 import static org.testng.Assert.assertTrue;
 
 
@@ -69,11 +73,13 @@ public class MultiValueRawQueriesTest extends 
BaseQueriesTest {
   private final static String MV_FLOAT_COL = "mvFloatCol";
   private final static String MV_DOUBLE_COL = "mvDoubleCol";
   private final static String MV_STRING_COL = "mvStringCol";
+  private final static String MV_STRING_COL_2 = "mvStringCol2";
   private final static String MV_RAW_INT_COL = "mvRawIntCol";
   private final static String MV_RAW_LONG_COL = "mvRawLongCol";
   private final static String MV_RAW_FLOAT_COL = "mvRawFloatCol";
   private final static String MV_RAW_DOUBLE_COL = "mvRawDoubleCol";
   private final static String MV_RAW_STRING_COL = "mvRawStringCol";
+  private final static String MV_RAW_STRING_COL_2 = "mvRawStringCol2";
 
   private static final Schema SCHEMA = new 
Schema.SchemaBuilder().setSchemaName(RAW_TABLE_NAME)
       .addSingleValueDimension(SV_INT_COL, FieldSpec.DataType.INT)
@@ -82,31 +88,44 @@ public class MultiValueRawQueriesTest extends 
BaseQueriesTest {
       .addMultiValueDimension(MV_FLOAT_COL, FieldSpec.DataType.FLOAT)
       .addMultiValueDimension(MV_DOUBLE_COL, FieldSpec.DataType.DOUBLE)
       .addMultiValueDimension(MV_STRING_COL, FieldSpec.DataType.STRING)
+      .addMultiValueDimension(MV_STRING_COL_2, FieldSpec.DataType.STRING)
       .addMultiValueDimension(MV_RAW_INT_COL, FieldSpec.DataType.INT)
       .addMultiValueDimension(MV_RAW_LONG_COL, FieldSpec.DataType.LONG)
       .addMultiValueDimension(MV_RAW_FLOAT_COL, FieldSpec.DataType.FLOAT)
       .addMultiValueDimension(MV_RAW_DOUBLE_COL, FieldSpec.DataType.DOUBLE)
       .addMultiValueDimension(MV_RAW_STRING_COL, FieldSpec.DataType.STRING)
+      .addMultiValueDimension(MV_RAW_STRING_COL_2, FieldSpec.DataType.STRING)
       .build();
 
   private static final DataSchema DATA_SCHEMA = new DataSchema(new 
String[]{"mvDoubleCol", "mvFloatCol", "mvIntCol",
-      "mvLongCol", "mvRawDoubleCol", "mvRawFloatCol", "mvRawIntCol", 
"mvRawLongCol", "mvRawStringCol", "mvStringCol",
-      "svIntCol"},
+      "mvLongCol", "mvRawDoubleCol", "mvRawFloatCol", "mvRawIntCol", 
"mvRawLongCol", "mvRawStringCol",
+      "mvRawStringCol2", "mvStringCol", "mvStringCol2", "svIntCol"},
       new DataSchema.ColumnDataType[]{DataSchema.ColumnDataType.DOUBLE_ARRAY, 
DataSchema.ColumnDataType.FLOAT_ARRAY,
           DataSchema.ColumnDataType.INT_ARRAY, 
DataSchema.ColumnDataType.LONG_ARRAY,
           DataSchema.ColumnDataType.DOUBLE_ARRAY, 
DataSchema.ColumnDataType.FLOAT_ARRAY,
           DataSchema.ColumnDataType.INT_ARRAY, 
DataSchema.ColumnDataType.LONG_ARRAY,
           DataSchema.ColumnDataType.STRING_ARRAY, 
DataSchema.ColumnDataType.STRING_ARRAY,
+          DataSchema.ColumnDataType.STRING_ARRAY, 
DataSchema.ColumnDataType.STRING_ARRAY,
           DataSchema.ColumnDataType.INT});
 
   private static final TableConfig TABLE = new 
TableConfigBuilder(TableType.OFFLINE).setTableName(RAW_TABLE_NAME)
       .setNoDictionaryColumns(
-          Arrays.asList(MV_RAW_INT_COL, MV_RAW_LONG_COL, MV_RAW_FLOAT_COL, 
MV_RAW_DOUBLE_COL, MV_RAW_STRING_COL))
+          Arrays.asList(MV_RAW_INT_COL, MV_RAW_LONG_COL, MV_RAW_FLOAT_COL, 
MV_RAW_DOUBLE_COL, MV_RAW_STRING_COL,
+              MV_RAW_STRING_COL_2))
       .build();
 
   private IndexSegment _indexSegment;
   private List<IndexSegment> _indexSegments;
 
+  private List<String> _sortedStringListOverall;
+
+  private final List<String> _stringList1 = new ArrayList<>();
+  private final List<String> _stringList2 = new ArrayList<>();
+  private final Set<String> _stringSet = new HashSet<>();
+  private final Set<String> _stringSet2 = new HashSet<>();
+
+  private final Random _random = new Random();
+
   @Override
   protected String getFilter() {
     return "";
@@ -131,6 +150,10 @@ public class MultiValueRawQueriesTest extends 
BaseQueriesTest {
     ImmutableSegment segment2 = createSegment(generateRecords(BASE_VALUE_2), 
SEGMENT_NAME_2);
     _indexSegment = segment1;
     _indexSegments = Arrays.asList(segment1, segment2);
+
+    _sortedStringListOverall = new ArrayList<>(_stringSet);
+    _sortedStringListOverall.addAll(_stringSet2);
+    Collections.sort(_sortedStringListOverall);
   }
 
   @AfterClass
@@ -165,6 +188,16 @@ public class MultiValueRawQueriesTest extends 
BaseQueriesTest {
       record.putValue(MV_RAW_FLOAT_COL, mvValue);
       record.putValue(MV_RAW_DOUBLE_COL, mvValue);
       record.putValue(MV_RAW_STRING_COL, mvValue);
+
+      String stringVal = RandomStringUtils.randomAlphanumeric(10, 100);
+      String stringVal2 = RandomStringUtils.randomAlphanumeric(10, 100);
+      record.putValue(MV_STRING_COL_2, Arrays.asList(stringVal, stringVal2));
+      record.putValue(MV_RAW_STRING_COL_2, Arrays.asList(stringVal, 
stringVal2));
+      _stringSet.add(stringVal);
+      _stringSet2.add(stringVal2);
+      _stringList1.add(stringVal);
+      _stringList2.add(stringVal2);
+
       uniqueRecords.add(record);
     }
 
@@ -211,8 +244,8 @@ public class MultiValueRawQueriesTest extends 
BaseQueriesTest {
       Set<Integer> actualValuesSecond = new HashSet<>();
       for (int i = 0; i < 40; i++) {
         Object[] values = recordRows.get(i);
-        assertEquals(values.length, 11);
-        int svIntValue = (int) values[10];
+        assertEquals(values.length, 13);
+        int svIntValue = (int) values[12];
         int[] intValues = (int[]) values[2];
         assertEquals(intValues[1] - intValues[0], MV_OFFSET);
         assertEquals(svIntValue, intValues[0]);
@@ -243,12 +276,19 @@ public class MultiValueRawQueriesTest extends 
BaseQueriesTest {
         assertEquals(doubleValues[1], doubleValuesRaw[1]);
 
         String[] stringValues = (String[]) values[8];
-        String[] stringValuesRaw = (String[]) values[9];
+        String[] stringValuesRaw = (String[]) values[10];
         assertEquals(Integer.parseInt(stringValues[0]), intValues[0]);
         assertEquals(Integer.parseInt(stringValues[1]), intValues[1]);
         assertEquals(stringValues[0], stringValuesRaw[0]);
         assertEquals(stringValues[1], stringValuesRaw[1]);
 
+        String[] stringValues2 = (String[]) values[9];
+        String[] stringValuesRaw2 = (String[]) values[11];
+        assertEquals(stringValues2[0], stringValuesRaw2[0]);
+        assertEquals(stringValues2[1], stringValuesRaw2[1]);
+        assertTrue(_stringSet.contains(stringValuesRaw2[0]));
+        assertTrue(_stringSet2.contains(stringValuesRaw2[1]));
+
         actualValuesFirst.add(intValues[0]);
         actualValuesSecond.add(intValues[1]);
       }
@@ -257,17 +297,18 @@ public class MultiValueRawQueriesTest extends 
BaseQueriesTest {
     }
     {
       // Select some dict based MV and some raw MV columns. Validate that the 
values match for the corresponding rows
-      String query = "SELECT mvIntCol, mvDoubleCol, mvStringCol, mvRawIntCol, 
mvRawDoubleCol, mvRawStringCol, svIntCol "
-          + "from testTable ORDER BY svIntCol LIMIT 40";
+      String query = "SELECT mvIntCol, mvDoubleCol, mvStringCol, mvRawIntCol, 
mvRawDoubleCol, mvRawStringCol, "
+          + "svIntCol, mvStringCol2, mvRawStringCol2 from testTable ORDER BY 
svIntCol LIMIT 40";
       ResultTable resultTable = getBrokerResponse(query).getResultTable();
       assertNotNull(resultTable);
       DataSchema dataSchema = new DataSchema(new String[]{
-          "mvIntCol", "mvDoubleCol", "mvStringCol", "mvRawIntCol", 
"mvRawDoubleCol", "mvRawStringCol", "svIntCol"
+          "mvIntCol", "mvDoubleCol", "mvStringCol", "mvRawIntCol", 
"mvRawDoubleCol", "mvRawStringCol", "svIntCol",
+          "mvStringCol2", "mvRawStringCol2"
       }, new DataSchema.ColumnDataType[]{
           DataSchema.ColumnDataType.INT_ARRAY, 
DataSchema.ColumnDataType.DOUBLE_ARRAY,
           DataSchema.ColumnDataType.STRING_ARRAY, 
DataSchema.ColumnDataType.INT_ARRAY,
           DataSchema.ColumnDataType.DOUBLE_ARRAY, 
DataSchema.ColumnDataType.STRING_ARRAY,
-          DataSchema.ColumnDataType.INT
+          DataSchema.ColumnDataType.INT, 
DataSchema.ColumnDataType.STRING_ARRAY, DataSchema.ColumnDataType.STRING_ARRAY
       });
       assertEquals(resultTable.getDataSchema(), dataSchema);
       List<Object[]> recordRows = resultTable.getRows();
@@ -284,7 +325,7 @@ public class MultiValueRawQueriesTest extends 
BaseQueriesTest {
       Set<Integer> actualValuesSecond = new HashSet<>();
       for (int i = 0; i < 40; i++) {
         Object[] values = recordRows.get(i);
-        assertEquals(values.length, 7);
+        assertEquals(values.length, 9);
         int[] intValues = (int[]) values[0];
         assertEquals(intValues[1] - intValues[0], MV_OFFSET);
 
@@ -306,6 +347,13 @@ public class MultiValueRawQueriesTest extends 
BaseQueriesTest {
         assertEquals(stringValues[0], stringValuesRaw[0]);
         assertEquals(stringValues[1], stringValuesRaw[1]);
 
+        String[] stringValues2 = (String[]) values[7];
+        String[] stringValuesRaw2 = (String[]) values[8];
+        assertEquals(stringValues2[0], stringValuesRaw2[0]);
+        assertEquals(stringValues2[1], stringValuesRaw2[1]);
+        assertTrue(_stringSet.contains(stringValuesRaw2[0]));
+        assertTrue(_stringSet2.contains(stringValuesRaw2[1]));
+
         assertEquals(intValues[0], (int) values[6]);
         assertEquals(intValuesRaw[0], (int) values[6]);
 
@@ -433,6 +481,44 @@ public class MultiValueRawQueriesTest extends 
BaseQueriesTest {
         assertEquals((String) values[2], expectedStringValues[i]);
       }
     }
+    {
+      // Test a group by order by query on variable length string column and 
compare results with query on dict based
+      // variable length string column
+      String query1 = "SELECT mvRawStringCol2 from testTable GROUP BY 
mvRawStringCol2 ORDER BY mvRawStringCol2 "
+          + "LIMIT 20";
+      ResultTable resultTable1 = getBrokerResponse(query1).getResultTable();
+      assertNotNull(resultTable1);
+      DataSchema dataSchema1 = new DataSchema(new String[]{
+          "mvRawStringCol2"
+      }, new DataSchema.ColumnDataType[]{
+          DataSchema.ColumnDataType.STRING
+      });
+      assertEquals(resultTable1.getDataSchema(), dataSchema1);
+      List<Object[]> recordRows1 = resultTable1.getRows();
+      assertEquals(recordRows1.size(), 20);
+
+      String query2 = "SELECT mvStringCol2 from testTable GROUP BY 
mvStringCol2 ORDER BY mvStringCol2 LIMIT 20";
+      ResultTable resultTable2 = getBrokerResponse(query2).getResultTable();
+      assertNotNull(resultTable2);
+      DataSchema dataSchema2 = new DataSchema(new String[]{
+          "mvStringCol2"
+      }, new DataSchema.ColumnDataType[]{
+          DataSchema.ColumnDataType.STRING
+      });
+      assertEquals(resultTable2.getDataSchema(), dataSchema2);
+      List<Object[]> recordRows2 = resultTable2.getRows();
+      assertEquals(recordRows2.size(), 20);
+
+      for (int i = 0; i < 10; i++) {
+        Object[] values1 = recordRows1.get(i);
+        Object[] values2 = recordRows2.get(i);
+        assertEquals(values1.length, 1);
+        assertEquals(values2.length, 1);
+
+        assertEquals(values1[0], values2[0]);
+        assertEquals(values1[0], _sortedStringListOverall.get(i));
+      }
+    }
     {
       // Test a select with a VALUEIN transform function with group by
       String query = "SELECT VALUEIN(mvRawIntCol, '0') from testTable WHERE 
mvRawIntCol IN (0) GROUP BY "
@@ -642,7 +728,7 @@ public class MultiValueRawQueriesTest extends 
BaseQueriesTest {
       List<Object[]> recordRows = resultTable.getRows();
       assertEquals(recordRows.size(), 10);
 
-      for (int i = 0; i < 4; i++) {
+      for (int i = 0; i < 10; i++) {
         Object[] values = recordRows.get(i);
         assertEquals(values.length, 4);
         int[] intVal = (int[]) values[0];
@@ -697,7 +783,7 @@ public class MultiValueRawQueriesTest extends 
BaseQueriesTest {
       List<Object[]> recordRows = resultTable.getRows();
       assertEquals(recordRows.size(), 8);
 
-      for (int i = 0; i < 4; i++) {
+      for (int i = 0; i < 8; i++) {
         Object[] values = recordRows.get(i);
         assertEquals(values.length, 4);
         int[] intVal = (int[]) values[0];
@@ -733,7 +819,7 @@ public class MultiValueRawQueriesTest extends 
BaseQueriesTest {
       List<Object[]> recordRows = resultTable.getRows();
       assertEquals(recordRows.size(), 8);
 
-      for (int i = 0; i < 4; i++) {
+      for (int i = 0; i < 8; i++) {
         Object[] values = recordRows.get(i);
         assertEquals(values.length, 1);
         String[] stringVal = (String[]) values[0];
@@ -742,6 +828,53 @@ public class MultiValueRawQueriesTest extends 
BaseQueriesTest {
             || Integer.parseInt(stringVal[0]) == 1101 || 
Integer.parseInt(stringVal[1]) == 1101);
       }
     }
+    {
+      // Test a select with filter IN query on the variable length string MV 
raw column identifier
+      int index1 = _random.nextInt(_stringList1.size());
+      int index2 = _random.nextInt(_stringList2.size());
+      while (index2 == index1) {
+        index2 = _random.nextInt(_stringList2.size());
+      }
+      String val1 = _stringList1.get(index1);
+      String val2 = _stringList2.get(index2);
+
+      String query1 = "SELECT mvRawStringCol2 from testTable where 
mvRawStringCol2 IN ('" + val1 + "', '" + val2
+          + "') LIMIT 10";
+      ResultTable resultTable1 = getBrokerResponse(query1).getResultTable();
+      assertNotNull(resultTable1);
+      DataSchema dataSchema1 = new DataSchema(new String[]{
+          "mvRawStringCol2"
+      }, new DataSchema.ColumnDataType[]{
+          DataSchema.ColumnDataType.STRING_ARRAY
+      });
+      assertEquals(resultTable1.getDataSchema(), dataSchema1);
+      List<Object[]> recordRows1 = resultTable1.getRows();
+      assertEquals(recordRows1.size(), 8);
+
+      String query2 = "SELECT mvStringCol2 from testTable where mvStringCol2 
IN ('" + val1 + "', '" + val2
+          + "') LIMIT 10";
+      ResultTable resultTable2 = getBrokerResponse(query2).getResultTable();
+      assertNotNull(resultTable2);
+      DataSchema dataSchema2 = new DataSchema(new String[]{
+          "mvStringCol2"
+      }, new DataSchema.ColumnDataType[]{
+          DataSchema.ColumnDataType.STRING_ARRAY
+      });
+      assertEquals(resultTable2.getDataSchema(), dataSchema2);
+      List<Object[]> recordRows2 = resultTable2.getRows();
+      assertEquals(recordRows2.size(), 8);
+
+      for (int i = 0; i < 8; i++) {
+        Object[] values1 = recordRows1.get(i);
+        Object[] values2 = recordRows2.get(i);
+        assertEquals(values1.length, 1);
+        assertEquals(values2.length, 1);
+        String[] stringVal1 = (String[]) values1[0];
+        String[] stringVal2 = (String[]) values2[0];
+        assertTrue(stringVal1[0].equals(val1) || stringVal1[1].equals(val2));
+        assertTrue(stringVal2[0].equals(val1) || stringVal2[1].equals(val2));
+      }
+    }
     {
       // Test a select with filter query on an arraylength transform function
       String query = "SELECT mvRawIntCol, mvRawDoubleCol, mvRawStringCol from 
testTable where "
@@ -938,6 +1071,48 @@ public class MultiValueRawQueriesTest extends 
BaseQueriesTest {
       });
       validateSimpleAggregateQueryResults(resultTable, dataSchema);
     }
+    {
+      // Aggregation on variable length string columns. Only countmv works for 
string columns with alpha-numeric
+      // characters. Other aggregations work for string columns if the strings 
are actually numeric strings.
+      String query = "SELECT COUNTMV(mvStringCol2), COUNTMV(mvRawStringCol2) 
from testTable";
+      ResultTable resultTable = getBrokerResponse(query).getResultTable();
+
+      DataSchema dataSchema = new DataSchema(new String[]{
+          "countmv(mvStringCol2)", "countmv(mvRawStringCol2)"
+      }, new DataSchema.ColumnDataType[]{
+          DataSchema.ColumnDataType.LONG, DataSchema.ColumnDataType.LONG
+      });
+      assertNotNull(resultTable);
+      assertEquals(resultTable.getDataSchema(), dataSchema);
+      List<Object[]> recordRows = resultTable.getRows();
+      assertEquals(recordRows.size(), 1);
+
+      Object[] values = recordRows.get(0);
+      assertEquals(values.length, 2);
+      long countInt = (long) values[0];
+      long countIntRaw = (long) values[1];
+      assertEquals(countInt, 160);
+      assertEquals(countInt, countIntRaw);
+    }
+    {
+      // Aggregation on variable length string columns. Only countmv works for 
string columns with alpha-numeric
+      // characters. Other aggregations work for string columns if the strings 
are actually numeric strings.
+      String query = "SELECT SUMMV(mvStringCol2), SUMMV(mvRawStringCol2) from 
testTable";
+      ResultTable resultTable = getBrokerResponse(query).getResultTable();
+      assertNull(resultTable);
+
+      query = "SELECT AVGMV(mvStringCol2), AVGMV(mvRawStringCol2) from 
testTable";
+      resultTable = getBrokerResponse(query).getResultTable();
+      assertNull(resultTable);
+
+      query = "SELECT MINMV(mvStringCol2), MINMV(mvRawStringCol2) from 
testTable";
+      resultTable = getBrokerResponse(query).getResultTable();
+      assertNull(resultTable);
+
+      query = "SELECT MAXMV(mvStringCol2), MAXMV(mvRawStringCol2) from 
testTable";
+      resultTable = getBrokerResponse(query).getResultTable();
+      assertNull(resultTable);
+    }
   }
 
   private void validateSimpleAggregateQueryResults(ResultTable resultTable, 
DataSchema expectedDataSchema) {
@@ -947,6 +1122,7 @@ public class MultiValueRawQueriesTest extends 
BaseQueriesTest {
     assertEquals(recordRows.size(), 1);
 
     Object[] values = recordRows.get(0);
+    assertEquals(values.length, 10);
     long countInt = (long) values[0];
     long countIntRaw = (long) values[1];
     assertEquals(countInt, 160);
@@ -1072,6 +1248,29 @@ public class MultiValueRawQueriesTest extends 
BaseQueriesTest {
         assertEquals((long) values[1], 8);
       }
     }
+    {
+      // Aggregation on a single variable length string column, group by on a 
single MV raw column
+      String query = "SELECT mvRawIntCol, COUNTMV(mvRawStringCol2) from 
testTable GROUP BY mvRawIntCol ORDER BY "
+          + "mvRawIntCol LIMIT 10";
+      ResultTable resultTable = getBrokerResponse(query).getResultTable();
+
+      DataSchema dataSchema = new DataSchema(new String[]{
+          "mvRawIntCol", "countmv(mvRawStringCol2)"
+      }, new DataSchema.ColumnDataType[]{
+          DataSchema.ColumnDataType.INT, DataSchema.ColumnDataType.LONG
+      });
+      assertNotNull(resultTable);
+      assertEquals(resultTable.getDataSchema(), dataSchema);
+      List<Object[]> recordRows = resultTable.getRows();
+      assertEquals(recordRows.size(), 10);
+
+      for (int i = 0; i < 10; i++) {
+        Object[] values = resultTable.getRows().get(i);
+        assertEquals(values.length, 2);
+        assertEquals((int) values[0], i);
+        assertEquals((long) values[1], 8);
+      }
+    }
     {
       // Aggregation on a single column, group by on 2 MV raw columns
       String query = "SELECT mvRawIntCol, mvRawDoubleCol, 
COUNTMV(mvRawLongCol) from testTable GROUP BY mvRawIntCol, "
@@ -1226,6 +1425,33 @@ public class MultiValueRawQueriesTest extends 
BaseQueriesTest {
       });
       validateAggregateWithGroupByQueryResults(resultTable, dataSchema, false);
     }
+    {
+      // Aggregation on variable length string columns with group by. Only 
count aggregations should work
+      String query = "SELECT COUNTMV(mvStringCol2), COUNTMV(mvRawStringCol2), 
svIntCol, mvRawIntCol from testTable "
+          + "GROUP BY svIntCol, mvRawIntCol ORDER BY svIntCol";
+      ResultTable resultTable = getBrokerResponse(query).getResultTable();
+
+      DataSchema dataSchema = new DataSchema(new String[]{
+          "countmv(mvStringCol2)", "countmv(mvRawStringCol2)", "svIntCol", 
"mvRawIntCol"
+      }, new DataSchema.ColumnDataType[]{
+          DataSchema.ColumnDataType.LONG, DataSchema.ColumnDataType.LONG, 
DataSchema.ColumnDataType.INT,
+          DataSchema.ColumnDataType.INT
+      });
+
+      assertNotNull(resultTable);
+      assertEquals(resultTable.getDataSchema(), dataSchema);
+      List<Object[]> recordRows = resultTable.getRows();
+      assertEquals(recordRows.size(), 10);
+
+      for (int i = 0; i < 10; i++) {
+        Object[] values = recordRows.get(i);
+        assertEquals(values.length, 4);
+        assertEquals(values[0], 8L);
+        assertEquals(values[1], 8L);
+        assertEquals(values[2], i / 2);
+        assertTrue((((int) values[3] - (int) values[2]) == 0) || (((int) 
values[3] - (int) values[2]) == 100));
+      }
+    }
     {
       // Aggregation on int columns with group by on 3 columns
       String query = "SELECT COUNTMV(mvIntCol), COUNTMV(mvRawIntCol), 
SUMMV(mvIntCol), SUMMV(mvRawIntCol), "
@@ -1314,7 +1540,7 @@ public class MultiValueRawQueriesTest extends 
BaseQueriesTest {
       // Aggregation on string columns with group by on 3 columns
       String query = "SELECT COUNTMV(mvStringCol), COUNTMV(mvRawStringCol), 
SUMMV(mvStringCol), SUMMV(mvRawStringCol), "
           + "MINMV(mvStringCol), MINMV(mvRawStringCol), MAXMV(mvStringCol), 
MAXMV(mvRawStringCol), AVGMV(mvStringCol), "
-          + "AVGMV(mvRawStringCol), svIntCol, mvIntCol, mvRawIntCol from 
testTable GROUP BY svIntCol, mvIntCol,"
+          + "AVGMV(mvRawStringCol), svIntCol, mvIntCol, mvRawIntCol from 
testTable GROUP BY svIntCol, mvIntCol, "
           + "mvRawIntCol ORDER BY svIntCol";
       ResultTable resultTable = getBrokerResponse(query).getResultTable();
 
@@ -1331,6 +1557,36 @@ public class MultiValueRawQueriesTest extends 
BaseQueriesTest {
       });
       validateAggregateWithGroupByQueryResults(resultTable, dataSchema, true);
     }
+    {
+      // Aggregation on variable length string columns with group by on 3 
columns. Only count aggregation should work.
+      String query = "SELECT COUNTMV(mvStringCol2), COUNTMV(mvRawStringCol2), 
svIntCol, mvIntCol, mvRawIntCol "
+          + "from testTable GROUP BY svIntCol, mvIntCol, mvRawIntCol ORDER BY 
svIntCol";
+      ResultTable resultTable = getBrokerResponse(query).getResultTable();
+
+      DataSchema dataSchema = new DataSchema(new String[]{
+          "countmv(mvStringCol2)", "countmv(mvRawStringCol2)", "svIntCol", 
"mvIntCol", "mvRawIntCol"
+      }, new DataSchema.ColumnDataType[]{
+          DataSchema.ColumnDataType.LONG, DataSchema.ColumnDataType.LONG, 
DataSchema.ColumnDataType.INT,
+          DataSchema.ColumnDataType.INT, DataSchema.ColumnDataType.INT
+      });
+
+      assertNotNull(resultTable);
+      assertEquals(resultTable.getDataSchema(), dataSchema);
+      List<Object[]> recordRows = resultTable.getRows();
+      assertEquals(recordRows.size(), 10);
+
+      int[] expectedSvIntValues = new int[]{0, 0, 0, 0, 1, 1, 1, 1, 2, 2};
+
+      for (int i = 0; i < 10; i++) {
+        Object[] values = recordRows.get(i);
+        assertEquals(values.length, 5);
+        assertEquals(values[0], 8L);
+        assertEquals(values[1], 8L);
+        assertEquals(values[2], expectedSvIntValues[i]);
+        assertTrue((((int) values[3] - (int) values[2]) == 0) || (((int) 
values[3] - (int) values[2]) == 100));
+        assertTrue((((int) values[4] - (int) values[2]) == 0) || (((int) 
values[4] - (int) values[2]) == 100));
+      }
+    }
     {
       // Aggregation on int columns with group by on 3 columns, two of them RAW
       String query = "SELECT COUNTMV(mvIntCol), COUNTMV(mvRawIntCol), 
SUMMV(mvIntCol), SUMMV(mvRawIntCol), "
@@ -1561,6 +1817,31 @@ public class MultiValueRawQueriesTest extends 
BaseQueriesTest {
       });
       validateAggregateWithGroupByOrderByQueryResults(resultTable, dataSchema);
     }
+    {
+      // Aggregation on variable length string columns with order by (same 
results as simple aggregation)
+      String query = "SELECT COUNTMV(mvStringCol2), COUNTMV(mvRawStringCol2), 
mvRawIntCol from testTable GROUP BY "
+          + "mvRawIntCol ORDER BY mvRawIntCol";
+      ResultTable resultTable = getBrokerResponse(query).getResultTable();
+
+      DataSchema dataSchema = new DataSchema(new String[]{
+          "countmv(mvStringCol2)", "countmv(mvRawStringCol2)", "mvRawIntCol"
+      }, new DataSchema.ColumnDataType[]{
+          DataSchema.ColumnDataType.LONG, DataSchema.ColumnDataType.LONG, 
DataSchema.ColumnDataType.INT
+      });
+
+      assertNotNull(resultTable);
+      assertEquals(resultTable.getDataSchema(), dataSchema);
+      List<Object[]> recordRows = resultTable.getRows();
+      assertEquals(recordRows.size(), 10);
+
+      for (int i = 0; i < 10; i++) {
+        Object[] values = recordRows.get(i);
+        assertEquals(values.length, 3);
+        assertEquals(values[0], 8L);
+        assertEquals(values[1], 8L);
+        assertEquals(values[2], i);
+      }
+    }
     {
       // Aggregation on int columns with group by order by with order by agg
       String query = "SELECT COUNTMV(mvIntCol), COUNTMV(mvRawIntCol), 
SUMMV(mvIntCol), SUMMV(mvRawIntCol), "
@@ -1741,5 +2022,27 @@ public class MultiValueRawQueriesTest extends 
BaseQueriesTest {
       Object[] value = resultTable.getRows().get(0);
       assertEquals(value[0], 20.0);
     }
+    {
+      // Transform within aggregation for variable length raw String MV
+      int index1 = _random.nextInt(_stringList1.size());
+      int index2 = _random.nextInt(_stringList2.size());
+      while (index2 == index1) {
+        index2 = _random.nextInt(_stringList2.size());
+      }
+      String val1 = _stringList1.get(index1);
+      String val2 = _stringList2.get(index2);
+      String query = "SELECT COUNTMV(VALUEIN(mvRawStringCol2, '" + val1 + "', 
'" + val2 + "')) from testTable "
+          + "WHERE mvRawStringCol2 IN ('" + val1 + "', '" + val2 + "')";
+      ResultTable resultTable = getBrokerResponse(query).getResultTable();
+
+      DataSchema dataSchema = new DataSchema(new 
String[]{"countmv(valuein(mvRawStringCol2,'" + val1 + "','" + val2
+          + "'))"},
+          new DataSchema.ColumnDataType[]{DataSchema.ColumnDataType.LONG});
+      assertEquals(resultTable.getDataSchema(), dataSchema);
+
+      assertEquals(resultTable.getRows().size(), 1);
+      Object[] value = resultTable.getRows().get(0);
+      assertEquals(value[0], 8L);
+    }
   }
 }
diff --git 
a/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/index/creator/MultiValueVarByteRawIndexCreatorTest.java
 
b/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/index/creator/MultiValueVarByteRawIndexCreatorTest.java
index 1f40871bbd..1006245586 100644
--- 
a/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/index/creator/MultiValueVarByteRawIndexCreatorTest.java
+++ 
b/pinot-segment-local/src/test/java/org/apache/pinot/segment/local/segment/index/creator/MultiValueVarByteRawIndexCreatorTest.java
@@ -27,6 +27,7 @@ import java.util.List;
 import java.util.Random;
 import java.util.UUID;
 import java.util.stream.IntStream;
+import java.util.stream.Stream;
 import org.apache.commons.io.FileUtils;
 import 
org.apache.pinot.segment.local.segment.creator.impl.fwd.MultiValueVarByteRawIndexCreator;
 import 
org.apache.pinot.segment.local.segment.index.readers.forward.ChunkReaderContext;
@@ -57,8 +58,10 @@ public class MultiValueVarByteRawIndexCreatorTest {
   public Object[][] params() {
     return Arrays.stream(ChunkCompressionType.values())
         .flatMap(chunkCompressionType -> IntStream.of(10, 15, 20, 1000).boxed()
-            .flatMap(maxLength -> IntStream.range(1, 20).map(i -> i * 2 - 
1).boxed()
-                .map(maxNumEntries -> new Object[]{chunkCompressionType, 
maxLength, maxNumEntries})))
+            .flatMap(useFullSize -> Stream.of(true, false)
+                .flatMap(maxLength -> IntStream.range(1, 20).map(i -> i * 2 - 
1).boxed()
+                    .map(maxNumEntries -> new Object[]{chunkCompressionType, 
useFullSize, maxLength,
+                        maxNumEntries}))))
         .toArray(Object[][]::new);
   }
 
@@ -83,7 +86,7 @@ public class MultiValueVarByteRawIndexCreatorTest {
   }
 
   @Test(dataProvider = "params")
-  public void testMVString(ChunkCompressionType compressionType, int 
maxLength, int maxNumEntries)
+  public void testMVString(ChunkCompressionType compressionType, int 
maxLength, boolean useFullSize, int maxNumEntries)
       throws IOException {
     String column = "testCol-" + UUID.randomUUID();
     int numDocs = 1000;
@@ -93,12 +96,12 @@ public class MultiValueVarByteRawIndexCreatorTest {
     int maxTotalLength = 0;
     int maxElements = 0;
     for (int i = 0; i < numDocs; i++) {
-      int numEntries = random.nextInt(maxNumEntries + 1);
+      int numEntries = useFullSize ? maxNumEntries : 
random.nextInt(maxNumEntries + 1);
       maxElements = Math.max(numEntries, maxElements);
       String[] values = new String[numEntries];
       int serializedLength = 0;
       for (int j = 0; j < numEntries; j++) {
-        int length = random.nextInt(maxLength);
+        int length = useFullSize ? maxLength : random.nextInt(maxLength + 1);
         serializedLength += length;
         char[] value = new char[length];
         Arrays.fill(value, 'b');
@@ -135,7 +138,7 @@ public class MultiValueVarByteRawIndexCreatorTest {
   }
 
   @Test(dataProvider = "params")
-  public void testMVBytes(ChunkCompressionType compressionType, int maxLength, 
int maxNumEntries)
+  public void testMVBytes(ChunkCompressionType compressionType, int maxLength, 
boolean useFullSize, int maxNumEntries)
       throws IOException {
     String column = "testCol-" + UUID.randomUUID();
     int numDocs = 1000;
@@ -145,12 +148,12 @@ public class MultiValueVarByteRawIndexCreatorTest {
     int maxTotalLength = 0;
     int maxElements = 0;
     for (int i = 0; i < numDocs; i++) {
-      int numEntries = random.nextInt(maxNumEntries);
+      int numEntries = useFullSize ? maxNumEntries : 
random.nextInt(maxNumEntries + 1);
       maxElements = Math.max(numEntries, maxElements);
       byte[][] values = new byte[numEntries][];
       int serializedLength = 0;
       for (int j = 0; j < numEntries; j++) {
-        int length = random.nextInt(maxLength);
+        int length = useFullSize ? maxLength : random.nextInt(maxLength + 1);
         serializedLength += length;
         byte[] value = new byte[length];
         Arrays.fill(value, (byte) 'b');


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to