Copilot commented on code in PR #17659: URL: https://github.com/apache/pinot/pull/17659#discussion_r2892041935
########## pinot-core/src/test/java/org/apache/pinot/core/operator/transform/function/FilterMvTransformFunctionTest.java: ########## @@ -0,0 +1,137 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.pinot.core.operator.transform.function; + +import it.unimi.dsi.fastutil.ints.IntArrayList; +import it.unimi.dsi.fastutil.ints.IntList; +import org.apache.pinot.common.request.context.ExpressionContext; +import org.apache.pinot.common.request.context.RequestContextUtils; +import org.apache.pinot.core.operator.transform.TransformResultMetadata; +import org.apache.pinot.segment.spi.index.reader.Dictionary; +import org.apache.pinot.spi.data.FieldSpec.DataType; +import org.apache.pinot.spi.exception.BadQueryRequestException; +import org.testng.annotations.DataProvider; +import org.testng.annotations.Test; + +import static org.testng.Assert.assertEquals; +import static org.testng.Assert.assertFalse; +import static org.testng.Assert.assertTrue; + + +public class FilterMvTransformFunctionTest extends BaseTransformFunctionTest { + + @Test + public void testFilterMvTransformFunctionInt() { + String expressionStr = String.format("filterMv(%s, 'v > 5')", INT_MV_COLUMN); + ExpressionContext expression = RequestContextUtils.getExpression(expressionStr); + TransformFunction transformFunction = TransformFunctionFactory.get(expression, _dataSourceMap); + assertTrue(transformFunction instanceof FilterMvTransformFunction); + assertEquals(transformFunction.getName(), FilterMvTransformFunction.FUNCTION_NAME); + TransformResultMetadata resultMetadata = transformFunction.getResultMetadata(); + assertEquals(resultMetadata.getDataType(), DataType.INT); + assertFalse(resultMetadata.isSingleValue()); + assertTrue(resultMetadata.hasDictionary()); + + int[][] dictIdsMV = transformFunction.transformToDictIdsMV(_projectionBlock); + int[][] intValuesMV = transformFunction.transformToIntValuesMV(_projectionBlock); + long[][] longValuesMV = transformFunction.transformToLongValuesMV(_projectionBlock); + float[][] floatValuesMV = transformFunction.transformToFloatValuesMV(_projectionBlock); + double[][] doubleValuesMV = transformFunction.transformToDoubleValuesMV(_projectionBlock); + String[][] stringValuesMV = transformFunction.transformToStringValuesMV(_projectionBlock); + + Dictionary dictionary = transformFunction.getDictionary(); + for (int i = 0; i < NUM_ROWS; i++) { + IntList expectedList = new IntArrayList(); + for (int value : _intMVValues[i]) { + if (value > 5) { + expectedList.add(value); + } + } + int[] expectedValues = expectedList.toIntArray(); + + int numValues = expectedValues.length; + assertEquals(dictIdsMV[i].length, numValues); + assertEquals(intValuesMV[i].length, numValues); + assertEquals(longValuesMV[i].length, numValues); + assertEquals(floatValuesMV[i].length, numValues); + assertEquals(doubleValuesMV[i].length, numValues); + assertEquals(stringValuesMV[i].length, numValues); + for (int j = 0; j < numValues; j++) { + int expected = expectedValues[j]; + assertEquals(dictIdsMV[i][j], dictionary.indexOf(expected)); + assertEquals(intValuesMV[i][j], expected); + assertEquals(longValuesMV[i][j], expected); + assertEquals(floatValuesMV[i][j], (float) expected); + assertEquals(doubleValuesMV[i][j], (double) expected); + assertEquals(stringValuesMV[i][j], Integer.toString(expected)); + } + } + } + + @Test(dataProvider = "testFilterMvTransformFunctionString") Review Comment: The DataProvider name and provider method name are identical to the test method name, which makes stack traces and test reports harder to read. Consider using a more descriptive provider name (e.g., `filterMvStringPredicates`) and keeping provider method names distinct from test method names. ########## pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/custom/ArrayTest.java: ########## @@ -727,6 +728,62 @@ public void testArraysOverlapWithSameColumn(boolean useMultiStageQueryEngine) assertEquals(jsonNode.get("resultTable").get("rows").get(0).get(0).asLong(), getCountStarResult()); } + @Test(dataProvider = "useBothQueryEngines") + public void testFilterMvLongArrayFilterValues(boolean useMultiStageQueryEngine) + throws Exception { + setUseMultiStageQueryEngine(useMultiStageQueryEngine); + String query = String.format("SELECT filterMv(%s, 'v > 1') FROM %s LIMIT 1", LONG_ARRAY_COLUMN, getTableName()); + JsonNode result = postQuery(query).get("resultTable"); + assertEquals(result.get("dataSchema").get("columnDataTypes").get(0).textValue(), "LONG_ARRAY"); + JsonNode rows = result.get("rows"); + assertEquals(rows.size(), 1); + JsonNode row = rows.get(0); + assertEquals(row.size(), 1); + JsonNode values = row.get(0); + assertEquals(values.size(), 2); + assertEquals(values.get(0).longValue(), 2L); + assertEquals(values.get(1).longValue(), 3L); + } + + @Test(dataProvider = "useBothQueryEngines") + public void testFilterMvLongArrayFilterPredicate(boolean useMultiStageQueryEngine) + throws Exception { + setUseMultiStageQueryEngine(useMultiStageQueryEngine); + String query = String.format("SELECT COUNT(*) FROM %s WHERE arrayLength(filterMv(%s, 'v > 1')) = 2", + getTableName(), LONG_ARRAY_COLUMN); + JsonNode jsonNode = postQuery(query); + assertEquals(jsonNode.get("resultTable").get("rows").get(0).get(0).asLong(), getCountStarResult()); + } + + @Test(dataProvider = "useBothQueryEngines") + public void testFilterMvRegexpLikeFilterValues(boolean useMultiStageQueryEngine) + throws Exception { + setUseMultiStageQueryEngine(useMultiStageQueryEngine); + String query = String.format("SELECT filterMv(%s, 'REGEXP_LIKE(v, ''^/api/.*'')') FROM %s LIMIT 1", + STRING_ARRAY_COLUMN, getTableName()); Review Comment: `LIMIT 1` without `ORDER BY` makes these tests dependent on whichever row the engine returns first, which can be non-deterministic and lead to flaky integration tests. Consider making the row selection deterministic (e.g., add an `ORDER BY <stableColumn>` or a `WHERE` clause that targets a known record) before asserting exact MV contents. ########## pinot-common/src/main/java/org/apache/pinot/common/function/TransformFunctionType.java: ########## @@ -169,6 +169,7 @@ public enum TransformFunctionType { // Special functions VALUE_IN("valueIn", ReturnTypes.ARG0, OperandTypes.variadic(SqlOperandCountRanges.from(2))), + FILTER_MV("filterMv", ReturnTypes.ARG0, OperandTypes.family(List.of(SqlTypeFamily.ANY, SqlTypeFamily.CHARACTER))), Review Comment: The operand type `SqlTypeFamily.ANY` for the first argument is overly permissive and allows type-validation to succeed for non-array operands at planning time (even though execution rejects SV inputs). To keep planning/type inference aligned with runtime behavior, restrict the first operand family to `ARRAY` (or the closest Calcite family representing Pinot MV/ARRAY types, consistent with other array functions). ```suggestion FILTER_MV("filterMv", ReturnTypes.ARG0, OperandTypes.family(List.of(SqlTypeFamily.ARRAY, SqlTypeFamily.CHARACTER))), ``` ########## pinot-integration-tests/src/test/java/org/apache/pinot/integration/tests/custom/ArrayTest.java: ########## @@ -727,6 +728,62 @@ public void testArraysOverlapWithSameColumn(boolean useMultiStageQueryEngine) assertEquals(jsonNode.get("resultTable").get("rows").get(0).get(0).asLong(), getCountStarResult()); } + @Test(dataProvider = "useBothQueryEngines") + public void testFilterMvLongArrayFilterValues(boolean useMultiStageQueryEngine) + throws Exception { + setUseMultiStageQueryEngine(useMultiStageQueryEngine); + String query = String.format("SELECT filterMv(%s, 'v > 1') FROM %s LIMIT 1", LONG_ARRAY_COLUMN, getTableName()); Review Comment: `LIMIT 1` without `ORDER BY` makes these tests dependent on whichever row the engine returns first, which can be non-deterministic and lead to flaky integration tests. Consider making the row selection deterministic (e.g., add an `ORDER BY <stableColumn>` or a `WHERE` clause that targets a known record) before asserting exact MV contents. ```suggestion String query = String.format( "SELECT filterMv(%s, 'v > 1') FROM %s ORDER BY filterMv(%s, 'v > 1') LIMIT 1", LONG_ARRAY_COLUMN, getTableName(), LONG_ARRAY_COLUMN); ``` ########## pinot-core/src/test/java/org/apache/pinot/core/operator/transform/function/FilterMvTransformFunctionTest.java: ########## @@ -0,0 +1,137 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.pinot.core.operator.transform.function; + +import it.unimi.dsi.fastutil.ints.IntArrayList; +import it.unimi.dsi.fastutil.ints.IntList; +import org.apache.pinot.common.request.context.ExpressionContext; +import org.apache.pinot.common.request.context.RequestContextUtils; +import org.apache.pinot.core.operator.transform.TransformResultMetadata; +import org.apache.pinot.segment.spi.index.reader.Dictionary; +import org.apache.pinot.spi.data.FieldSpec.DataType; +import org.apache.pinot.spi.exception.BadQueryRequestException; +import org.testng.annotations.DataProvider; +import org.testng.annotations.Test; + +import static org.testng.Assert.assertEquals; +import static org.testng.Assert.assertFalse; +import static org.testng.Assert.assertTrue; + + +public class FilterMvTransformFunctionTest extends BaseTransformFunctionTest { + + @Test + public void testFilterMvTransformFunctionInt() { + String expressionStr = String.format("filterMv(%s, 'v > 5')", INT_MV_COLUMN); + ExpressionContext expression = RequestContextUtils.getExpression(expressionStr); + TransformFunction transformFunction = TransformFunctionFactory.get(expression, _dataSourceMap); + assertTrue(transformFunction instanceof FilterMvTransformFunction); + assertEquals(transformFunction.getName(), FilterMvTransformFunction.FUNCTION_NAME); + TransformResultMetadata resultMetadata = transformFunction.getResultMetadata(); + assertEquals(resultMetadata.getDataType(), DataType.INT); + assertFalse(resultMetadata.isSingleValue()); + assertTrue(resultMetadata.hasDictionary()); + + int[][] dictIdsMV = transformFunction.transformToDictIdsMV(_projectionBlock); + int[][] intValuesMV = transformFunction.transformToIntValuesMV(_projectionBlock); + long[][] longValuesMV = transformFunction.transformToLongValuesMV(_projectionBlock); + float[][] floatValuesMV = transformFunction.transformToFloatValuesMV(_projectionBlock); + double[][] doubleValuesMV = transformFunction.transformToDoubleValuesMV(_projectionBlock); + String[][] stringValuesMV = transformFunction.transformToStringValuesMV(_projectionBlock); + + Dictionary dictionary = transformFunction.getDictionary(); + for (int i = 0; i < NUM_ROWS; i++) { + IntList expectedList = new IntArrayList(); + for (int value : _intMVValues[i]) { + if (value > 5) { + expectedList.add(value); + } + } + int[] expectedValues = expectedList.toIntArray(); + + int numValues = expectedValues.length; + assertEquals(dictIdsMV[i].length, numValues); + assertEquals(intValuesMV[i].length, numValues); + assertEquals(longValuesMV[i].length, numValues); + assertEquals(floatValuesMV[i].length, numValues); + assertEquals(doubleValuesMV[i].length, numValues); + assertEquals(stringValuesMV[i].length, numValues); + for (int j = 0; j < numValues; j++) { + int expected = expectedValues[j]; + assertEquals(dictIdsMV[i][j], dictionary.indexOf(expected)); + assertEquals(intValuesMV[i][j], expected); + assertEquals(longValuesMV[i][j], expected); + assertEquals(floatValuesMV[i][j], (float) expected); + assertEquals(doubleValuesMV[i][j], (double) expected); + assertEquals(stringValuesMV[i][j], Integer.toString(expected)); + } + } + } + + @Test(dataProvider = "testFilterMvTransformFunctionString") + public void testFilterMvTransformFunctionString(String predicate, boolean expectMatch) { + String escaped = predicate.replace("'", "''"); + String expressionStr = String.format("filterMv(%s, '%s')", STRING_ALPHANUM_MV_COLUMN_2, escaped); + ExpressionContext expression = RequestContextUtils.getExpression(expressionStr); + TransformFunction transformFunction = TransformFunctionFactory.get(expression, _dataSourceMap); + assertTrue(transformFunction instanceof FilterMvTransformFunction); + TransformResultMetadata resultMetadata = transformFunction.getResultMetadata(); + assertEquals(resultMetadata.getDataType(), DataType.STRING); + assertFalse(resultMetadata.isSingleValue()); + assertTrue(resultMetadata.hasDictionary()); + + String[][] stringValuesMV = transformFunction.transformToStringValuesMV(_projectionBlock); + Dictionary dictionary = transformFunction.getDictionary(); + for (int i = 0; i < NUM_ROWS; i++) { + int expectedLength = expectMatch ? _stringAlphaNumericMV2Values[i].length : 0; + assertEquals(stringValuesMV[i].length, expectedLength); + for (int j = 0; j < expectedLength; j++) { + assertEquals(stringValuesMV[i][j], "a"); + assertEquals(dictionary.indexOf(stringValuesMV[i][j]), dictionary.indexOf("a")); + } + } + } + + @DataProvider(name = "testFilterMvTransformFunctionString") + public Object[][] testFilterMvTransformFunctionString() { Review Comment: The DataProvider name and provider method name are identical to the test method name, which makes stack traces and test reports harder to read. Consider using a more descriptive provider name (e.g., `filterMvStringPredicates`) and keeping provider method names distinct from test method names. ########## pinot-core/src/test/java/org/apache/pinot/core/operator/transform/function/FilterMvTransformFunctionTest.java: ########## @@ -0,0 +1,137 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.pinot.core.operator.transform.function; + +import it.unimi.dsi.fastutil.ints.IntArrayList; +import it.unimi.dsi.fastutil.ints.IntList; +import org.apache.pinot.common.request.context.ExpressionContext; +import org.apache.pinot.common.request.context.RequestContextUtils; +import org.apache.pinot.core.operator.transform.TransformResultMetadata; +import org.apache.pinot.segment.spi.index.reader.Dictionary; +import org.apache.pinot.spi.data.FieldSpec.DataType; +import org.apache.pinot.spi.exception.BadQueryRequestException; +import org.testng.annotations.DataProvider; +import org.testng.annotations.Test; + +import static org.testng.Assert.assertEquals; +import static org.testng.Assert.assertFalse; +import static org.testng.Assert.assertTrue; + + +public class FilterMvTransformFunctionTest extends BaseTransformFunctionTest { + + @Test + public void testFilterMvTransformFunctionInt() { + String expressionStr = String.format("filterMv(%s, 'v > 5')", INT_MV_COLUMN); + ExpressionContext expression = RequestContextUtils.getExpression(expressionStr); + TransformFunction transformFunction = TransformFunctionFactory.get(expression, _dataSourceMap); + assertTrue(transformFunction instanceof FilterMvTransformFunction); + assertEquals(transformFunction.getName(), FilterMvTransformFunction.FUNCTION_NAME); + TransformResultMetadata resultMetadata = transformFunction.getResultMetadata(); + assertEquals(resultMetadata.getDataType(), DataType.INT); + assertFalse(resultMetadata.isSingleValue()); + assertTrue(resultMetadata.hasDictionary()); + + int[][] dictIdsMV = transformFunction.transformToDictIdsMV(_projectionBlock); + int[][] intValuesMV = transformFunction.transformToIntValuesMV(_projectionBlock); + long[][] longValuesMV = transformFunction.transformToLongValuesMV(_projectionBlock); + float[][] floatValuesMV = transformFunction.transformToFloatValuesMV(_projectionBlock); + double[][] doubleValuesMV = transformFunction.transformToDoubleValuesMV(_projectionBlock); + String[][] stringValuesMV = transformFunction.transformToStringValuesMV(_projectionBlock); + + Dictionary dictionary = transformFunction.getDictionary(); + for (int i = 0; i < NUM_ROWS; i++) { + IntList expectedList = new IntArrayList(); + for (int value : _intMVValues[i]) { + if (value > 5) { + expectedList.add(value); + } + } + int[] expectedValues = expectedList.toIntArray(); + + int numValues = expectedValues.length; + assertEquals(dictIdsMV[i].length, numValues); + assertEquals(intValuesMV[i].length, numValues); + assertEquals(longValuesMV[i].length, numValues); + assertEquals(floatValuesMV[i].length, numValues); + assertEquals(doubleValuesMV[i].length, numValues); + assertEquals(stringValuesMV[i].length, numValues); + for (int j = 0; j < numValues; j++) { + int expected = expectedValues[j]; + assertEquals(dictIdsMV[i][j], dictionary.indexOf(expected)); + assertEquals(intValuesMV[i][j], expected); + assertEquals(longValuesMV[i][j], expected); + assertEquals(floatValuesMV[i][j], (float) expected); + assertEquals(doubleValuesMV[i][j], (double) expected); + assertEquals(stringValuesMV[i][j], Integer.toString(expected)); + } + } + } + + @Test(dataProvider = "testFilterMvTransformFunctionString") + public void testFilterMvTransformFunctionString(String predicate, boolean expectMatch) { + String escaped = predicate.replace("'", "''"); + String expressionStr = String.format("filterMv(%s, '%s')", STRING_ALPHANUM_MV_COLUMN_2, escaped); + ExpressionContext expression = RequestContextUtils.getExpression(expressionStr); + TransformFunction transformFunction = TransformFunctionFactory.get(expression, _dataSourceMap); + assertTrue(transformFunction instanceof FilterMvTransformFunction); + TransformResultMetadata resultMetadata = transformFunction.getResultMetadata(); + assertEquals(resultMetadata.getDataType(), DataType.STRING); + assertFalse(resultMetadata.isSingleValue()); + assertTrue(resultMetadata.hasDictionary()); + + String[][] stringValuesMV = transformFunction.transformToStringValuesMV(_projectionBlock); + Dictionary dictionary = transformFunction.getDictionary(); + for (int i = 0; i < NUM_ROWS; i++) { + int expectedLength = expectMatch ? _stringAlphaNumericMV2Values[i].length : 0; + assertEquals(stringValuesMV[i].length, expectedLength); + for (int j = 0; j < expectedLength; j++) { + assertEquals(stringValuesMV[i][j], "a"); + assertEquals(dictionary.indexOf(stringValuesMV[i][j]), dictionary.indexOf("a")); + } + } + } + + @DataProvider(name = "testFilterMvTransformFunctionString") + public Object[][] testFilterMvTransformFunctionString() { + return new Object[][]{ + new Object[]{"v = 'a'", true}, + new Object[]{"v = 'b'", false}, + new Object[]{"REGEXP_LIKE(v, '^a$')", true}, + new Object[]{"REGEXP_LIKE(v, '^b$')", false} + }; Review Comment: Test coverage for `filterMv` predicates looks incomplete relative to the supported semantics (e.g., `!=`, comparison ops, `IN`/`NOT IN`, `BETWEEN`, and boolean composition via `AND`/`OR`/`NOT`). Add unit tests that exercise these predicate types (and ideally at least one non-STRING type like `BYTES_ARRAY`) so new/modified branches in predicate parsing + evaluation are covered. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected] --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
