This is an automated email from the ASF dual-hosted git repository. haibow pushed a commit to branch master in repository https://gitbox.apache.org/repos/asf/incubator-pinot.git
The following commit(s) were added to refs/heads/master by this push: new 5548e79 Table indexing config validation (#6017) 5548e79 is described below commit 5548e791f9273ec9ffd74f237385c9e7a531f9f3 Author: icefury71 <chinmay.cere...@gmail.com> AuthorDate: Thu Sep 17 21:02:48 2020 -0700 Table indexing config validation (#6017) * Adding validation for table indexing config to check for valid column names * * Addressing review comments: adding validation for FieldConfigList. Adding javadocs * Bug fix in integration test regarding removal of existing column from schema * Adding validation for StarTreeIndexingConfig * Addressing review comments --- .../apache/pinot/core/util/TableConfigUtils.java | 114 ++++++++++++++- .../pinot/core/util/TableConfigUtilsTest.java | 154 +++++++++++++++++++++ ...ulls_default_column_test_missing_columns.schema | 4 +- .../spi/utils/builder/TableConfigBuilder.java | 15 ++ 4 files changed, 282 insertions(+), 5 deletions(-) diff --git a/pinot-core/src/main/java/org/apache/pinot/core/util/TableConfigUtils.java b/pinot-core/src/main/java/org/apache/pinot/core/util/TableConfigUtils.java index dfe93a3..c91c7f9 100644 --- a/pinot-core/src/main/java/org/apache/pinot/core/util/TableConfigUtils.java +++ b/pinot-core/src/main/java/org/apache/pinot/core/util/TableConfigUtils.java @@ -20,8 +20,10 @@ package org.apache.pinot.core.util; import com.google.common.base.Preconditions; import java.util.Collections; +import java.util.HashMap; import java.util.HashSet; import java.util.List; +import java.util.Map; import java.util.Set; import javax.annotation.Nullable; import org.apache.pinot.common.tier.TierFactory; @@ -29,8 +31,12 @@ import org.apache.pinot.common.utils.CommonConstants; import org.apache.pinot.common.utils.config.TagNameUtils; import org.apache.pinot.core.data.function.FunctionEvaluator; import org.apache.pinot.core.data.function.FunctionEvaluatorFactory; +import org.apache.pinot.core.startree.v2.AggregationFunctionColumnPair; +import org.apache.pinot.spi.config.table.FieldConfig; +import org.apache.pinot.spi.config.table.IndexingConfig; import org.apache.pinot.spi.config.table.IngestionConfig; import org.apache.pinot.spi.config.table.SegmentsValidationAndRetentionConfig; +import org.apache.pinot.spi.config.table.StarTreeIndexConfig; import org.apache.pinot.spi.config.table.TableConfig; import org.apache.pinot.spi.config.table.TableType; import org.apache.pinot.spi.config.table.TierConfig; @@ -55,8 +61,9 @@ public final class TableConfigUtils { * 1. Validation config * 2. IngestionConfig * 3. TierConfigs + * 4. Indexing config * - * TODO: Add more validations for each section (e.g. verify column names used in the indexing, validate conditions are met for aggregateMetrics etc) + * TODO: Add more validations for each section (e.g. validate conditions are met for aggregateMetrics) */ public static void validate(TableConfig tableConfig, @Nullable Schema schema) { if (tableConfig.getTableType() == TableType.REALTIME) { @@ -65,6 +72,8 @@ public final class TableConfigUtils { validateValidationConfig(tableConfig, schema); validateIngestionConfig(tableConfig.getIngestionConfig(), schema); validateTierConfigList(tableConfig.getTierConfigsList()); + validateIndexingConfig(tableConfig.getIndexingConfig(), schema); + validateFieldConfigList(tableConfig.getFieldConfigList(), schema); } /** @@ -75,8 +84,8 @@ public final class TableConfigUtils { */ public static void validateTableName(TableConfig tableConfig) { String tableName = tableConfig.getTableName(); - if (tableName.contains(".")) { - throw new IllegalStateException("Table name: '" + tableName + "' containing '.' is not allowed"); + if (tableName.contains(".") || tableName.contains(" ")) { + throw new IllegalStateException("Table name: '" + tableName + "' containing '.' or space is not allowed"); } } @@ -225,4 +234,103 @@ public final class TableConfigUtils { } } } + + /** + * Validates the Indexing Config + * Ensures that every referred column name exists in the corresponding schema + */ + private static void validateIndexingConfig(@Nullable IndexingConfig indexingConfig, @Nullable Schema schema) { + if (indexingConfig == null || schema == null) { + return; + } + Map<String, String> columnNameToConfigMap = new HashMap<>(); + + if (indexingConfig.getBloomFilterColumns() != null) { + for (String columnName : indexingConfig.getBloomFilterColumns()) { + columnNameToConfigMap.put(columnName, "Bloom Filter Config"); + } + } + if (indexingConfig.getInvertedIndexColumns() != null) { + for (String columnName : indexingConfig.getInvertedIndexColumns()) { + columnNameToConfigMap.put(columnName, "Inverted Index Config"); + } + } + if (indexingConfig.getNoDictionaryColumns() != null) { + for (String columnName : indexingConfig.getNoDictionaryColumns()) { + columnNameToConfigMap.put(columnName, "No Dictionary Column Config"); + } + } + if (indexingConfig.getOnHeapDictionaryColumns() != null) { + for (String columnName : indexingConfig.getOnHeapDictionaryColumns()) { + columnNameToConfigMap.put(columnName, "On Heap Dictionary Column Config"); + } + } + if (indexingConfig.getRangeIndexColumns() != null) { + for (String columnName : indexingConfig.getRangeIndexColumns()) { + columnNameToConfigMap.put(columnName, "Range Column Config"); + } + } + if (indexingConfig.getSortedColumn() != null) { + for (String columnName : indexingConfig.getSortedColumn()) { + columnNameToConfigMap.put(columnName, "Sorted Column Config"); + } + } + if (indexingConfig.getVarLengthDictionaryColumns() != null) { + for (String columnName : indexingConfig.getVarLengthDictionaryColumns()) { + columnNameToConfigMap.put(columnName, "Var Length Column Config"); + } + } + List<StarTreeIndexConfig> starTreeIndexConfigList = indexingConfig.getStarTreeIndexConfigs(); + if (starTreeIndexConfigList != null) { + for (StarTreeIndexConfig starTreeIndexConfig : starTreeIndexConfigList) { + // Dimension split order cannot be null + for (String columnName : starTreeIndexConfig.getDimensionsSplitOrder()) { + columnNameToConfigMap.put(columnName, "StarTreeIndex Config"); + } + // Function column pairs cannot be null + for (String functionColumnPair : starTreeIndexConfig.getFunctionColumnPairs()) { + AggregationFunctionColumnPair columnPair; + try { + columnPair = AggregationFunctionColumnPair.fromColumnName(functionColumnPair); + } catch (Exception e) { + throw new IllegalStateException("Invalid StarTreeIndex config: " + functionColumnPair + ". Must be" + + "in the form <Aggregation function>__<Column name>"); + } + String columnName = columnPair.getColumn(); + if (!columnName.equals(AggregationFunctionColumnPair.STAR)) { + columnNameToConfigMap.put(columnName, "StarTreeIndex Config"); + } + } + List<String> skipDimensionList = starTreeIndexConfig.getSkipStarNodeCreationForDimensions(); + if (skipDimensionList != null) { + for (String columnName : skipDimensionList) { + columnNameToConfigMap.put(columnName, "StarTreeIndex Config"); + } + } + } + } + + for (Map.Entry<String, String> entry : columnNameToConfigMap.entrySet()) { + String columnName = entry.getKey(); + String configName = entry.getValue(); + Preconditions.checkState(schema.getFieldSpecFor(columnName) != null, + "Column Name " + columnName + " defined in " + configName + " must be a valid column defined in the schema"); + } + } + + /** + * Validates the Field Config List in the given TableConfig + * Ensures that every referred column name exists in the corresponding schema + */ + private static void validateFieldConfigList(@Nullable List<FieldConfig> fieldConfigList, @Nullable Schema schema) { + if (fieldConfigList == null || schema == null) { + return; + } + + for (FieldConfig fieldConfig : fieldConfigList) { + String columnName = fieldConfig.getName(); + Preconditions.checkState(schema.getFieldSpecFor(columnName) != null, + "Column Name " + columnName + " defined in field config list must be a valid column defined in the schema"); + } + } } diff --git a/pinot-core/src/test/java/org/apache/pinot/core/util/TableConfigUtilsTest.java b/pinot-core/src/test/java/org/apache/pinot/core/util/TableConfigUtilsTest.java index 0634589..32e658e 100644 --- a/pinot-core/src/test/java/org/apache/pinot/core/util/TableConfigUtilsTest.java +++ b/pinot-core/src/test/java/org/apache/pinot/core/util/TableConfigUtilsTest.java @@ -19,9 +19,12 @@ package org.apache.pinot.core.util; import com.google.common.collect.Lists; +import java.util.Arrays; import java.util.Collections; import org.apache.pinot.common.tier.TierFactory; +import org.apache.pinot.spi.config.table.FieldConfig; import org.apache.pinot.spi.config.table.IngestionConfig; +import org.apache.pinot.spi.config.table.StarTreeIndexConfig; import org.apache.pinot.spi.config.table.TableConfig; import org.apache.pinot.spi.config.table.TableType; import org.apache.pinot.spi.config.table.TierConfig; @@ -456,4 +459,155 @@ public class TableConfigUtilsTest { // expected } } + + @Test + public void testTableName() { + String[] malformedTableName = {"test.table", "test table"}; + for (int i = 0; i < 2; i++) { + String tableName = malformedTableName[i]; + TableConfig tableConfig = new TableConfigBuilder(TableType.OFFLINE).setTableName(tableName).build(); + try { + TableConfigUtils.validateTableName(tableConfig); + Assert.fail("Should fail for malformed table name : " + tableName); + } catch (IllegalStateException e) { + // expected + } + } + } + + @Test + public void testValidateIndexingConfig() { + Schema schema = + new Schema.SchemaBuilder().setSchemaName(TABLE_NAME).addSingleValueDimension("myCol", FieldSpec.DataType.STRING) + .build(); + TableConfig tableConfig = new TableConfigBuilder(TableType.OFFLINE).setTableName(TABLE_NAME) + .setBloomFilterColumns(Arrays.asList("myCol2")).build(); + try { + TableConfigUtils.validate(tableConfig, schema); + Assert.fail("Should fail for invalid Bloom filter column name"); + } catch (Exception e) { + // expected + } + + tableConfig = new TableConfigBuilder(TableType.OFFLINE).setTableName(TABLE_NAME) + .setInvertedIndexColumns(Arrays.asList("myCol2")).build(); + try { + TableConfigUtils.validate(tableConfig, schema); + Assert.fail("Should fail for invalid Inverted Index column name"); + } catch (Exception e) { + // expected + } + + tableConfig = new TableConfigBuilder(TableType.OFFLINE).setTableName(TABLE_NAME) + .setNoDictionaryColumns(Arrays.asList("myCol2")).build(); + try { + TableConfigUtils.validate(tableConfig, schema); + Assert.fail("Should fail for invalid No Dictionary column name"); + } catch (Exception e) { + // expected + } + + tableConfig = new TableConfigBuilder(TableType.OFFLINE).setTableName(TABLE_NAME) + .setOnHeapDictionaryColumns(Arrays.asList("myCol2")).build(); + try { + TableConfigUtils.validate(tableConfig, schema); + Assert.fail("Should fail for invalid On Heap Dictionary column name"); + } catch (Exception e) { + // expected + } + + tableConfig = + new TableConfigBuilder(TableType.OFFLINE).setTableName(TABLE_NAME).setRangeIndexColumns(Arrays.asList("myCol2")) + .build(); + try { + TableConfigUtils.validate(tableConfig, schema); + Assert.fail("Should fail for invalid Range Index column name"); + } catch (Exception e) { + // expected + } + + tableConfig = new TableConfigBuilder(TableType.OFFLINE).setTableName(TABLE_NAME).setSortedColumn("myCol2").build(); + try { + TableConfigUtils.validate(tableConfig, schema); + Assert.fail("Should fail for invalid Sorted column name"); + } catch (Exception e) { + // expected + } + + tableConfig = new TableConfigBuilder(TableType.OFFLINE).setTableName(TABLE_NAME) + .setVarLengthDictionaryColumns(Arrays.asList("myCol2")).build(); + try { + TableConfigUtils.validate(tableConfig, schema); + Assert.fail("Should fail for invalid Var Length Dictionary column name"); + } catch (Exception e) { + // expected + } + + // Although this config makes no sense, it should pass the validation phase + StarTreeIndexConfig starTreeIndexConfig = new StarTreeIndexConfig(Arrays.asList("myCol"), + Arrays.asList("myCol"), + Arrays.asList("SUM__myCol"), + 1); + tableConfig = new TableConfigBuilder(TableType.OFFLINE).setTableName(TABLE_NAME) + .setStarTreeIndexConfigs(Arrays.asList(starTreeIndexConfig)) + .build(); + try { + TableConfigUtils.validate(tableConfig, schema); + } catch (Exception e) { + Assert.fail("Should fail for valid StarTreeIndex config column name"); + // expected + } + + starTreeIndexConfig = new StarTreeIndexConfig(Arrays.asList("myCol2"), + Arrays.asList("myCol"), + Arrays.asList("SUM__myCol"), + 1); + tableConfig = new TableConfigBuilder(TableType.OFFLINE).setTableName(TABLE_NAME) + .setStarTreeIndexConfigs(Arrays.asList(starTreeIndexConfig)) + .build(); + try { + TableConfigUtils.validate(tableConfig, schema); + Assert.fail("Should fail for invalid StarTreeIndex config column name in dimension split order"); + } catch (Exception e) { + // expected + } + + starTreeIndexConfig = new StarTreeIndexConfig(Arrays.asList("myCol"), + Arrays.asList("myCol2"), + Arrays.asList("SUM__myCol"), + 1); + tableConfig = new TableConfigBuilder(TableType.OFFLINE).setTableName(TABLE_NAME) + .setStarTreeIndexConfigs(Arrays.asList(starTreeIndexConfig)) + .build(); + try { + TableConfigUtils.validate(tableConfig, schema); + Assert.fail("Should fail for invalid StarTreeIndex config column name in skip star node for dimension"); + } catch (Exception e) { + // expected + } + + starTreeIndexConfig = new StarTreeIndexConfig(Arrays.asList("myCol"), + Arrays.asList("myCol"), + Arrays.asList("SUM__myCol2"), + 1); + tableConfig = new TableConfigBuilder(TableType.OFFLINE).setTableName(TABLE_NAME) + .setStarTreeIndexConfigs(Arrays.asList(starTreeIndexConfig)) + .build(); + try { + TableConfigUtils.validate(tableConfig, schema); + Assert.fail("Should fail for invalid StarTreeIndex config column name in function column pair"); + } catch (Exception e) { + // expected + } + + FieldConfig fieldConfig = new FieldConfig("myCol2", null, null, null); + tableConfig = new TableConfigBuilder(TableType.OFFLINE).setTableName(TABLE_NAME) + .setFieldConfigList(Arrays.asList(fieldConfig)).build(); + try { + TableConfigUtils.validate(tableConfig, schema); + Assert.fail("Should fail for invalid column name in Field Config List"); + } catch (Exception e) { + // expected + } + } } diff --git a/pinot-integration-tests/src/test/resources/On_Time_On_Time_Performance_2014_100k_subset_nonulls_default_column_test_missing_columns.schema b/pinot-integration-tests/src/test/resources/On_Time_On_Time_Performance_2014_100k_subset_nonulls_default_column_test_missing_columns.schema index ba8d12f..e7e1776 100644 --- a/pinot-integration-tests/src/test/resources/On_Time_On_Time_Performance_2014_100k_subset_nonulls_default_column_test_missing_columns.schema +++ b/pinot-integration-tests/src/test/resources/On_Time_On_Time_Performance_2014_100k_subset_nonulls_default_column_test_missing_columns.schema @@ -253,7 +253,7 @@ ], "metricFieldSpecs": [ { - "name": "ArrDel15", + "name": "ActualElapsedTime", "dataType": "INT" }, { @@ -316,4 +316,4 @@ "timeType": "DAYS" } } -} \ No newline at end of file +} diff --git a/pinot-spi/src/main/java/org/apache/pinot/spi/utils/builder/TableConfigBuilder.java b/pinot-spi/src/main/java/org/apache/pinot/spi/utils/builder/TableConfigBuilder.java index 3891bf9..0e32a02 100644 --- a/pinot-spi/src/main/java/org/apache/pinot/spi/utils/builder/TableConfigBuilder.java +++ b/pinot-spi/src/main/java/org/apache/pinot/spi/utils/builder/TableConfigBuilder.java @@ -32,6 +32,7 @@ import org.apache.pinot.spi.config.table.ReplicaGroupStrategyConfig; import org.apache.pinot.spi.config.table.RoutingConfig; import org.apache.pinot.spi.config.table.SegmentPartitionConfig; import org.apache.pinot.spi.config.table.SegmentsValidationAndRetentionConfig; +import org.apache.pinot.spi.config.table.StarTreeIndexConfig; import org.apache.pinot.spi.config.table.TableConfig; import org.apache.pinot.spi.config.table.TableCustomConfig; import org.apache.pinot.spi.config.table.TableTaskConfig; @@ -89,6 +90,8 @@ public class TableConfigBuilder { private Map<String, String> _streamConfigs; private SegmentPartitionConfig _segmentPartitionConfig; private boolean _nullHandlingEnabled; + private List<String> _varLengthDictionaryColumns; + private List<StarTreeIndexConfig> _starTreeIndexConfigs; private TableCustomConfig _customConfig; private QuotaConfig _quotaConfig; @@ -247,6 +250,16 @@ public class TableConfigBuilder { return this; } + public TableConfigBuilder setVarLengthDictionaryColumns(List<String> varLengthDictionaryColumns) { + _varLengthDictionaryColumns = varLengthDictionaryColumns; + return this; + } + + public TableConfigBuilder setStarTreeIndexConfigs(List<StarTreeIndexConfig> starTreeIndexConfigs) { + _starTreeIndexConfigs = starTreeIndexConfigs; + return this; + } + public TableConfigBuilder setStreamConfigs(Map<String, String> streamConfigs) { Preconditions.checkState(_tableType == TableType.REALTIME); _streamConfigs = streamConfigs; @@ -358,6 +371,8 @@ public class TableConfigBuilder { indexingConfig.setStreamConfigs(_streamConfigs); indexingConfig.setSegmentPartitionConfig(_segmentPartitionConfig); indexingConfig.setNullHandlingEnabled(_nullHandlingEnabled); + indexingConfig.setVarLengthDictionaryColumns(_varLengthDictionaryColumns); + indexingConfig.setStarTreeIndexConfigs(_starTreeIndexConfigs); if (_customConfig == null) { _customConfig = new TableCustomConfig(null); --------------------------------------------------------------------- To unsubscribe, e-mail: commits-unsubscr...@pinot.apache.org For additional commands, e-mail: commits-h...@pinot.apache.org