Copilot commented on code in PR #17740: URL: https://github.com/apache/pinot/pull/17740#discussion_r2841431301
########## pinot-segment-local/src/test/java/org/apache/pinot/segment/local/columntransformer/TimeValidationColumnTransformerTest.java: ########## @@ -0,0 +1,228 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.pinot.segment.local.columntransformer; + +import org.apache.pinot.spi.config.table.TableConfig; +import org.apache.pinot.spi.config.table.TableType; +import org.apache.pinot.spi.config.table.ingestion.IngestionConfig; +import org.apache.pinot.spi.data.FieldSpec; +import org.apache.pinot.spi.data.Schema; +import org.apache.pinot.spi.utils.builder.TableConfigBuilder; +import org.testng.annotations.Test; + +import static org.testng.Assert.assertEquals; +import static org.testng.Assert.assertNull; +import static org.testng.Assert.assertTrue; + + +/** + * Tests for TimeValidationColumnTransformer. + */ +public class TimeValidationColumnTransformerTest { + + @Test + public void testIsNoOpWhenNoTimeColumn() { + Schema schema = new Schema.SchemaBuilder() + .addSingleValueDimension("col1", FieldSpec.DataType.INT) + .build(); + + TableConfig tableConfig = new TableConfigBuilder(TableType.OFFLINE) + .setTableName("testTable") + .build(); + + TimeValidationColumnTransformer transformer = new TimeValidationColumnTransformer(tableConfig, schema); + assertTrue(transformer.isNoOp(), "Should be no-op when no time column configured"); + } + + @Test + public void testIsNoOpWhenTimeValueCheckDisabled() { + Schema schema = new Schema.SchemaBuilder() + .addDateTime("timeCol", FieldSpec.DataType.LONG, "1:MILLISECONDS:EPOCH", "1:MILLISECONDS") + .build(); + + TableConfig tableConfig = new TableConfigBuilder(TableType.OFFLINE) + .setTableName("testTable") + .setTimeColumnName("timeCol") + .build(); + // rowTimeValueCheck is false by default + + TimeValidationColumnTransformer transformer = new TimeValidationColumnTransformer(tableConfig, schema); + assertTrue(transformer.isNoOp(), "Should be no-op when time value check is disabled"); + } + + @Test + public void testIsNoOpWhenTimeValueCheckEnabled() { + Schema schema = new Schema.SchemaBuilder() + .addDateTime("timeCol", FieldSpec.DataType.LONG, "1:MILLISECONDS:EPOCH", "1:MILLISECONDS") + .build(); + + IngestionConfig ingestionConfig = new IngestionConfig(); + ingestionConfig.setRowTimeValueCheck(true); + + TableConfig tableConfig = new TableConfigBuilder(TableType.OFFLINE) + .setTableName("testTable") + .setTimeColumnName("timeCol") + .setIngestionConfig(ingestionConfig) + .build(); + + TimeValidationColumnTransformer transformer = new TimeValidationColumnTransformer(tableConfig, schema); + assertTrue(!transformer.isNoOp(), "Should not be no-op when time value check is enabled"); + } + + @Test + public void testTransformValidTime() { + Schema schema = new Schema.SchemaBuilder() + .addDateTime("timeCol", FieldSpec.DataType.LONG, "1:MILLISECONDS:EPOCH", "1:MILLISECONDS") + .build(); + + IngestionConfig ingestionConfig = new IngestionConfig(); + ingestionConfig.setRowTimeValueCheck(true); + + TableConfig tableConfig = new TableConfigBuilder(TableType.OFFLINE) + .setTableName("testTable") + .setTimeColumnName("timeCol") + .setIngestionConfig(ingestionConfig) + .build(); + + TimeValidationColumnTransformer transformer = new TimeValidationColumnTransformer(tableConfig, schema); + + // Valid time: current time + long validTime = System.currentTimeMillis(); + Object result = transformer.transform(validTime); + + assertEquals(result, validTime, "Valid time should pass through unchanged"); + } Review Comment: This test depends on the current system clock (and will eventually fail once the current date exceeds the allowed range). Use a fixed timestamp known to be within the valid range to keep the test deterministic and time-zone/clock independent. ########## pinot-segment-local/src/test/java/org/apache/pinot/segment/local/columntransformer/TimeValidationColumnTransformerTest.java: ########## @@ -0,0 +1,228 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.pinot.segment.local.columntransformer; + +import org.apache.pinot.spi.config.table.TableConfig; +import org.apache.pinot.spi.config.table.TableType; +import org.apache.pinot.spi.config.table.ingestion.IngestionConfig; +import org.apache.pinot.spi.data.FieldSpec; +import org.apache.pinot.spi.data.Schema; +import org.apache.pinot.spi.utils.builder.TableConfigBuilder; +import org.testng.annotations.Test; + +import static org.testng.Assert.assertEquals; +import static org.testng.Assert.assertNull; +import static org.testng.Assert.assertTrue; + + +/** + * Tests for TimeValidationColumnTransformer. + */ +public class TimeValidationColumnTransformerTest { + + @Test + public void testIsNoOpWhenNoTimeColumn() { + Schema schema = new Schema.SchemaBuilder() + .addSingleValueDimension("col1", FieldSpec.DataType.INT) + .build(); + + TableConfig tableConfig = new TableConfigBuilder(TableType.OFFLINE) + .setTableName("testTable") + .build(); + + TimeValidationColumnTransformer transformer = new TimeValidationColumnTransformer(tableConfig, schema); + assertTrue(transformer.isNoOp(), "Should be no-op when no time column configured"); + } + + @Test + public void testIsNoOpWhenTimeValueCheckDisabled() { + Schema schema = new Schema.SchemaBuilder() + .addDateTime("timeCol", FieldSpec.DataType.LONG, "1:MILLISECONDS:EPOCH", "1:MILLISECONDS") + .build(); + + TableConfig tableConfig = new TableConfigBuilder(TableType.OFFLINE) + .setTableName("testTable") + .setTimeColumnName("timeCol") + .build(); + // rowTimeValueCheck is false by default + + TimeValidationColumnTransformer transformer = new TimeValidationColumnTransformer(tableConfig, schema); + assertTrue(transformer.isNoOp(), "Should be no-op when time value check is disabled"); + } + + @Test + public void testIsNoOpWhenTimeValueCheckEnabled() { Review Comment: Test name is misleading: it says "IsNoOp" but the assertion expects the transformer to NOT be a no-op. Rename the test method (or flip the assertion) so the name matches the behavior being verified. ```suggestion public void testIsNotNoOpWhenTimeValueCheckEnabled() { ``` ########## pinot-segment-local/src/main/java/org/apache/pinot/segment/local/columntransformer/SanitizationColumnTransformer.java: ########## @@ -0,0 +1,76 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.pinot.segment.local.columntransformer; + +import javax.annotation.Nullable; +import org.apache.pinot.segment.local.utils.SanitizationTransformerUtils; +import org.apache.pinot.segment.local.utils.SanitizationTransformerUtils.SanitizationResult; +import org.apache.pinot.segment.local.utils.SanitizationTransformerUtils.SanitizedColumnInfo; +import org.apache.pinot.spi.columntransformer.ColumnTransformer; +import org.apache.pinot.spi.data.FieldSpec; + + +/** + * Column transformer that sanitizes STRING, JSON, and BYTES column values. + * + * <p>The sanitization rules include: + * <ul> + * <li>No {@code null} characters in string values</li> + * <li>String values are within the length limit</li> + * </ul> + * + * <p>Uses the MaxLengthExceedStrategy in the FieldSpec to decide what to do when the value exceeds the max: + * <ul> + * <li>TRIM_LENGTH: The value is trimmed to the max length</li> + * <li>SUBSTITUTE_DEFAULT_VALUE: The value is replaced with the default null value string</li> + * <li>ERROR: An exception is thrown</li> + * <li>NO_ACTION: The value is kept as is if no NULL_CHARACTER present, else trimmed till NULL</li> + * </ul> + * + * <p>NOTE: should put this after {@link DataTypeColumnTransformer} so that all values follow the data types in + * FieldSpec, and after {@link NullValueColumnTransformer} so that before sanitization, all values are non-null + * and follow the data types defined in the schema. + */ +public class SanitizationColumnTransformer implements ColumnTransformer { + + @Nullable + private final SanitizedColumnInfo _columnInfo; + /** + * Create a SanitizationColumnTransformer. + * + * @param fieldSpec The field specification for the column + */ + public SanitizationColumnTransformer(FieldSpec fieldSpec) { + _columnInfo = SanitizationTransformerUtils.getSanitizedColumnInfo(fieldSpec); + } + + @Override + public boolean isNoOp() { + return _columnInfo == null; + } + + @Override + public Object transform(@Nullable Object value) { + SanitizationResult result = SanitizationTransformerUtils.sanitizeValue(_columnInfo, value); + if (result != null) { + return result.getValue(); + } + return value; Review Comment: _columnInfo is `@Nullable` (and isNoOp() uses that), but transform() still calls SanitizationTransformerUtils.sanitizeValue(_columnInfo, value) without a null guard. If transform() is called on a no-op instance, this will throw a NullPointerException. Add a fast-path in transform() for _columnInfo == null (return value), or make sanitizeValue() accept a nullable columnInfo and treat null as "no sanitization". ########## pinot-segment-local/src/main/java/org/apache/pinot/segment/local/columntransformer/TimeValidationColumnTransformer.java: ########## @@ -0,0 +1,61 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.pinot.segment.local.columntransformer; + +import javax.annotation.Nullable; +import org.apache.pinot.segment.local.utils.TimeValidationTransformerUtils; +import org.apache.pinot.segment.local.utils.TimeValidationTransformerUtils.TimeValidationConfig; +import org.apache.pinot.spi.columntransformer.ColumnTransformer; +import org.apache.pinot.spi.config.table.TableConfig; +import org.apache.pinot.spi.data.Schema; + + +/** + * Column transformer that validates time column values are within the valid range (1971-2071). + * Invalid values are transformed to null. + * + * <p>This transformer should be applied after {@link DataTypeColumnTransformer} so that all values + * follow the data types in FieldSpec, and before {@link NullValueColumnTransformer} so that + * invalidated values can be filled with defaults. + */ +public class TimeValidationColumnTransformer implements ColumnTransformer { + + @Nullable + private final TimeValidationConfig _config; + + /** + * Create a TimeValidationColumnTransformer. + * + * @param tableConfig The table configuration + * @param schema The schema + */ + public TimeValidationColumnTransformer(TableConfig tableConfig, Schema schema) { + _config = TimeValidationTransformerUtils.getConfig(tableConfig, schema); + } + + @Override + public boolean isNoOp() { + return _config == null; + } + + @Override + public Object transform(@Nullable Object value) { Review Comment: _config is marked @Nullable, but transform() unconditionally passes it to TimeValidationTransformerUtils.transformTimeValue(), which dereferences config. If transform() is ever called when isNoOp() is true, this will NPE. Consider guarding in transform() (return the input unchanged when _config is null) or changing transformTimeValue() to accept a nullable config and behave as identity when disabled. ```suggestion public Object transform(@Nullable Object value) { if (_config == null) { // When validation is disabled, behave as a no-op. return value; } ``` ########## pinot-segment-local/src/test/java/org/apache/pinot/segment/local/columntransformer/SanitizationColumnTransformerTest.java: ########## @@ -0,0 +1,226 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, + * software distributed under the License is distributed on an + * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY + * KIND, either express or implied. See the License for the + * specific language governing permissions and limitations + * under the License. + */ +package org.apache.pinot.segment.local.columntransformer; + +import org.apache.pinot.spi.data.DimensionFieldSpec; +import org.apache.pinot.spi.data.FieldSpec; +import org.apache.pinot.spi.data.FieldSpec.MaxLengthExceedStrategy; +import org.apache.pinot.spi.data.Schema; +import org.testng.annotations.Test; + +import static org.testng.Assert.assertEquals; +import static org.testng.Assert.assertFalse; +import static org.testng.Assert.assertNull; +import static org.testng.Assert.assertTrue; + + +/** + * Tests for SanitizationColumnTransformer. + */ +public class SanitizationColumnTransformerTest { + + @Test + public void testIsNoOpForInt() { + Schema schema = new Schema.SchemaBuilder() + .addSingleValueDimension("intCol", FieldSpec.DataType.INT) + .build(); + FieldSpec fieldSpec = schema.getFieldSpecFor("intCol"); + + SanitizationColumnTransformer transformer = new SanitizationColumnTransformer(fieldSpec); + assertTrue(transformer.isNoOp(), "INT column should be no-op"); + } + + @Test + public void testIsNotNoOpForString() { + Schema schema = new Schema.SchemaBuilder() + .addSingleValueDimension("stringCol", FieldSpec.DataType.STRING) + .build(); + FieldSpec fieldSpec = schema.getFieldSpecFor("stringCol"); + + SanitizationColumnTransformer transformer = new SanitizationColumnTransformer(fieldSpec); + assertFalse(transformer.isNoOp(), "STRING column should not be no-op"); + } + + @Test + public void testIsNotNoOpForJson() { + Schema schema = new Schema.SchemaBuilder() + .addSingleValueDimension("jsonCol", FieldSpec.DataType.JSON) + .build(); + FieldSpec fieldSpec = schema.getFieldSpecFor("jsonCol"); + + SanitizationColumnTransformer transformer = new SanitizationColumnTransformer(fieldSpec); + assertFalse(transformer.isNoOp(), "JSON column should not be no-op"); + } + + @Test + public void testIsNotNoOpForBytes() { + Schema schema = new Schema.SchemaBuilder() + .addSingleValueDimension("bytesCol", FieldSpec.DataType.BYTES) + .build(); + FieldSpec fieldSpec = schema.getFieldSpecFor("bytesCol"); + + SanitizationColumnTransformer transformer = new SanitizationColumnTransformer(fieldSpec); + // BYTES with NO_ACTION strategy should be no-op + // Default strategy depends on field spec configuration + assertTrue(transformer.isNoOp() || !transformer.isNoOp()); Review Comment: This assertion is a tautology and doesn't validate any behavior (it will always pass). Configure the BYTES FieldSpec so the expected max-length strategy is deterministic (e.g., set TRIM_LENGTH to assert not-no-op, or ensure NO_ACTION to assert no-op) and assert the specific expected outcome. ```suggestion DimensionFieldSpec bytesFieldSpec = new DimensionFieldSpec("bytesCol", FieldSpec.DataType.BYTES, true); // Configure a deterministic max-length exceed strategy so behavior is well-defined bytesFieldSpec.setMaxLengthExceedStrategy(MaxLengthExceedStrategy.TRIM_LENGTH); Schema schema = new Schema.SchemaBuilder() .addField(bytesFieldSpec) .build(); FieldSpec fieldSpec = schema.getFieldSpecFor("bytesCol"); SanitizationColumnTransformer transformer = new SanitizationColumnTransformer(fieldSpec); // BYTES with TRIM_LENGTH strategy should not be no-op assertFalse(transformer.isNoOp(), "BYTES column with TRIM_LENGTH strategy should not be no-op"); ``` -- This is an automated message from the Apache Git Service. To respond to the message, please log on to GitHub and use the URL above to go to the specific comment. To unsubscribe, e-mail: [email protected] For queries about this service, please contact Infrastructure at: [email protected] --------------------------------------------------------------------- To unsubscribe, e-mail: [email protected] For additional commands, e-mail: [email protected]
