Copilot commented on code in PR #17269: URL: https://github.com/apache/pinot/pull/17269#discussion_r3199106919
########## pinot-segment-local/src/test/java/org/apache/pinot/segment/local/utils/IndexCombinationValidationTest.java: ########## @@ -0,0 +1,665 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.pinot.segment.local.utils; + +import com.fasterxml.jackson.databind.node.ObjectNode; +import java.util.Arrays; +import java.util.List; +import org.apache.pinot.spi.config.table.FieldConfig; +import org.apache.pinot.spi.config.table.FieldConfig.CompressionCodec; +import org.apache.pinot.spi.config.table.FieldConfig.EncodingType; +import org.apache.pinot.spi.config.table.FieldConfig.IndexType; +import org.apache.pinot.spi.config.table.TableConfig; +import org.apache.pinot.spi.config.table.TableType; +import org.apache.pinot.spi.data.FieldSpec.DataType; +import org.apache.pinot.spi.data.Schema; +import org.apache.pinot.spi.utils.JsonUtils; +import org.apache.pinot.spi.utils.builder.TableConfigBuilder; +import org.testng.annotations.Test; + +import static org.testng.Assert.assertTrue; +import static org.testng.Assert.fail; + + +/** + * Exhaustive tests for all valid and invalid combinations of: + * - Forward index encoding: DICTIONARY vs RAW + * - Dictionary: enabled (default), disabled (noDictionaryColumns), or explicit (indexes.dictionary) + * - Secondary indexes: inverted, FST, IFST, range, text, JSON, bloom filter + * - Column data types: STRING (SV), INT (SV), INT (MV), FLOAT (SV) + * + * The core invariant under test: any column that uses an index requiring a dictionary + * (inverted, FST, IFST) must have an explicit dictionary config when the forward index is raw. + */ +public class IndexCombinationValidationTest { + + private static final String TABLE_NAME = "testTable"; + private static final String STR_COL = "strCol"; // STRING SV + private static final String INT_COL = "intCol"; // INT SV + private static final String INT_MV_COL = "intMvCol"; // INT MV + private static final String FLOAT_COL = "floatCol"; // FLOAT SV + + private static final Schema SCHEMA = new Schema.SchemaBuilder() + .setSchemaName(TABLE_NAME) + .addSingleValueDimension(STR_COL, DataType.STRING) + .addSingleValueDimension(INT_COL, DataType.INT) + .addMultiValueDimension(INT_MV_COL, DataType.INT) + .addSingleValueDimension(FLOAT_COL, DataType.FLOAT) + .build(); + + // ----- helpers ----- + + /** Build a FieldConfig with an explicit dictionary node in the indexes section. */ + private static FieldConfig rawWithExplicitDict(String col, List<IndexType> indexes) { + ObjectNode indexesNode = JsonUtils.newObjectNode(); + indexesNode.set("dictionary", JsonUtils.newObjectNode()); + return new FieldConfig(col, EncodingType.RAW, null, indexes, null, null, indexesNode, null, null); + } + + /** Validate and expect success. */ + private static void assertValid(TableConfig tableConfig) { + try { + TableConfigUtils.validate(tableConfig, SCHEMA); + } catch (Exception e) { + fail("Expected validation to pass but got: " + e.getMessage(), e); + } + } + + /** Validate and expect failure containing the given message fragment. */ + private static void assertInvalid(TableConfig tableConfig, String errorFragment) { + try { + TableConfigUtils.validate(tableConfig, SCHEMA); + fail("Expected validation to fail with: " + errorFragment); + } catch (Exception e) { + assertTrue(e.getMessage() != null && e.getMessage().contains(errorFragment), + "Expected '" + errorFragment + "' in error but got: " + e.getMessage()); + } + } + + /// Validate and expect failure containing any one of the given message fragments. Used when the same misconfig is + /// caught by multiple validation checks and the order of checks (and hence which error surfaces) is not + /// guaranteed across releases. For example, "raw forward + inverted with no explicit dictionary" can be flagged + /// either by `validateExplicitDictionaryForRawForwardIndex` ("require a dictionary") or by the older legacy + /// check that ran from `InvertedIndexType.validate` ("without dictionary"); both reject the same scenario. + private static void assertInvalidContains(TableConfig tableConfig, String... errorFragments) { Review Comment: The header comment references `validateExplicitDictionaryForRawForwardIndex`, but there is no such method/symbol in the codebase under that name. It would be clearer to reference the concrete validator(s) that enforce this invariant (e.g., DictionaryIndexType config derivation and/or specific IndexType.validate checks), so future readers know what actually rejects the config. ########## pinot-segment-local/src/test/java/org/apache/pinot/segment/local/utils/TableConfigUtilsTest.java: ########## @@ -1514,21 +1514,52 @@ public void testValidateFieldConfig() { Map<String, String> streamConfigs = getStreamConfigs(); tableConfig = new TableConfigBuilder(TableType.REALTIME).setTableName(TABLE_NAME) .setTimeColumnName(TIME_COLUMN) - .setNoDictionaryColumns(Arrays.asList("intCol")) .setStreamConfigs(streamConfigs) .build(); try { // Enable forward index disabled flag for a column with inverted index index and disable dictionary Map<String, String> fieldConfigProperties = new HashMap<>(); fieldConfigProperties.put(FieldConfig.FORWARD_INDEX_DISABLED, Boolean.TRUE.toString()); - FieldConfig fieldConfig = - new FieldConfig("intCol", FieldConfig.EncodingType.RAW, FieldConfig.IndexType.INVERTED, null, null, null, - fieldConfigProperties); - tableConfig.setFieldConfigList(Arrays.asList(fieldConfig)); + ObjectNode indexes = JsonUtils.newObjectNode(); + indexes.set("dictionary", JsonUtils.newObjectNode()); + FieldConfig realtimeFieldConfig = + new FieldConfig("intCol", FieldConfig.EncodingType.RAW, null, Arrays.asList(FieldConfig.IndexType.INVERTED), + null, null, indexes, fieldConfigProperties, null); + tableConfig.setFieldConfigList(Arrays.asList(realtimeFieldConfig)); TableConfigUtils.validate(tableConfig, schema); } catch (Exception e) { assertEquals(e.getMessage(), "Cannot disable forward index for column: intCol, as the table type is REALTIME"); } + + // Validate that inverted index on RAW column without explicit dictionary fails validation. Either of two + // validations may fire first: the legacy "Cannot create inverted index ... without dictionary" check, or the + // new `validateExplicitDictionaryForRawForwardIndex` check that requires an explicit dictionary block. + tableConfig = new TableConfigBuilder(TableType.OFFLINE).setTableName(TABLE_NAME).build(); Review Comment: This comment refers to a `validateExplicitDictionaryForRawForwardIndex` check, but that method/symbol doesn't exist in the codebase (at least under that name). If the validation is now enforced indirectly (e.g., via DictionaryIndexType config derivation + index-type validate()), consider updating the comment to point at the actual validation mechanism to avoid confusion when the test fails. ########## pinot-segment-spi/src/main/java/org/apache/pinot/segment/spi/index/DictionaryIndexConfig.java: ########## @@ -148,7 +148,7 @@ public static boolean requiresDictionary(FieldSpec fieldSpec, FieldIndexConfigs private static <C extends IndexConfig> boolean requiresDictionaryBy(IndexType<C, ?, ?> indexType, FieldSpec fieldSpec, FieldIndexConfigs fieldIndexConfigs) { C config = fieldIndexConfigs.getConfig(indexType); - if (config == null || config.isDisabled()) { + if (config.isDisabled()) { return false; } return indexType.requiresDictionary(fieldSpec, config); Review Comment: `requiresDictionaryBy()` calls `config.isDisabled()` without a null check. `FieldIndexConfigs#getConfig()` can return `null` if an `IndexType` implementation (e.g., a plugin) returns `null` from `getDefaultConfig()`, and `shouldInvalidateOnDictionaryChange()` still defensively checks for `config == null`. Consider restoring the `config == null` guard here (or enforce non-null default configs at the SPI boundary) to avoid a potential NPE when iterating `IndexService.getAllIndexes()`. -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected] --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
