Copilot commented on code in PR #17740:
URL: https://github.com/apache/pinot/pull/17740#discussion_r2841431301


##########
pinot-segment-local/src/test/java/org/apache/pinot/segment/local/columntransformer/TimeValidationColumnTransformerTest.java:
##########
@@ -0,0 +1,228 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.pinot.segment.local.columntransformer;
+
+import org.apache.pinot.spi.config.table.TableConfig;
+import org.apache.pinot.spi.config.table.TableType;
+import org.apache.pinot.spi.config.table.ingestion.IngestionConfig;
+import org.apache.pinot.spi.data.FieldSpec;
+import org.apache.pinot.spi.data.Schema;
+import org.apache.pinot.spi.utils.builder.TableConfigBuilder;
+import org.testng.annotations.Test;
+
+import static org.testng.Assert.assertEquals;
+import static org.testng.Assert.assertNull;
+import static org.testng.Assert.assertTrue;
+
+
+/**
+ * Tests for TimeValidationColumnTransformer.
+ */
+public class TimeValidationColumnTransformerTest {
+
+  @Test
+  public void testIsNoOpWhenNoTimeColumn() {
+    Schema schema = new Schema.SchemaBuilder()
+        .addSingleValueDimension("col1", FieldSpec.DataType.INT)
+        .build();
+
+    TableConfig tableConfig = new TableConfigBuilder(TableType.OFFLINE)
+        .setTableName("testTable")
+        .build();
+
+    TimeValidationColumnTransformer transformer = new 
TimeValidationColumnTransformer(tableConfig, schema);
+    assertTrue(transformer.isNoOp(), "Should be no-op when no time column 
configured");
+  }
+
+  @Test
+  public void testIsNoOpWhenTimeValueCheckDisabled() {
+    Schema schema = new Schema.SchemaBuilder()
+        .addDateTime("timeCol", FieldSpec.DataType.LONG, 
"1:MILLISECONDS:EPOCH", "1:MILLISECONDS")
+        .build();
+
+    TableConfig tableConfig = new TableConfigBuilder(TableType.OFFLINE)
+        .setTableName("testTable")
+        .setTimeColumnName("timeCol")
+        .build();
+    // rowTimeValueCheck is false by default
+
+    TimeValidationColumnTransformer transformer = new 
TimeValidationColumnTransformer(tableConfig, schema);
+    assertTrue(transformer.isNoOp(), "Should be no-op when time value check is 
disabled");
+  }
+
+  @Test
+  public void testIsNoOpWhenTimeValueCheckEnabled() {
+    Schema schema = new Schema.SchemaBuilder()
+        .addDateTime("timeCol", FieldSpec.DataType.LONG, 
"1:MILLISECONDS:EPOCH", "1:MILLISECONDS")
+        .build();
+
+    IngestionConfig ingestionConfig = new IngestionConfig();
+    ingestionConfig.setRowTimeValueCheck(true);
+
+    TableConfig tableConfig = new TableConfigBuilder(TableType.OFFLINE)
+        .setTableName("testTable")
+        .setTimeColumnName("timeCol")
+        .setIngestionConfig(ingestionConfig)
+        .build();
+
+    TimeValidationColumnTransformer transformer = new 
TimeValidationColumnTransformer(tableConfig, schema);
+    assertTrue(!transformer.isNoOp(), "Should not be no-op when time value 
check is enabled");
+  }
+
+  @Test
+  public void testTransformValidTime() {
+    Schema schema = new Schema.SchemaBuilder()
+        .addDateTime("timeCol", FieldSpec.DataType.LONG, 
"1:MILLISECONDS:EPOCH", "1:MILLISECONDS")
+        .build();
+
+    IngestionConfig ingestionConfig = new IngestionConfig();
+    ingestionConfig.setRowTimeValueCheck(true);
+
+    TableConfig tableConfig = new TableConfigBuilder(TableType.OFFLINE)
+        .setTableName("testTable")
+        .setTimeColumnName("timeCol")
+        .setIngestionConfig(ingestionConfig)
+        .build();
+
+    TimeValidationColumnTransformer transformer = new 
TimeValidationColumnTransformer(tableConfig, schema);
+
+    // Valid time: current time
+    long validTime = System.currentTimeMillis();
+    Object result = transformer.transform(validTime);
+
+    assertEquals(result, validTime, "Valid time should pass through 
unchanged");
+  }

Review Comment:
   This test depends on the current system clock (and will eventually fail once 
the current date exceeds the allowed range). Use a fixed timestamp known to be 
within the valid range to keep the test deterministic and time-zone/clock 
independent.



##########
pinot-segment-local/src/test/java/org/apache/pinot/segment/local/columntransformer/TimeValidationColumnTransformerTest.java:
##########
@@ -0,0 +1,228 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.pinot.segment.local.columntransformer;
+
+import org.apache.pinot.spi.config.table.TableConfig;
+import org.apache.pinot.spi.config.table.TableType;
+import org.apache.pinot.spi.config.table.ingestion.IngestionConfig;
+import org.apache.pinot.spi.data.FieldSpec;
+import org.apache.pinot.spi.data.Schema;
+import org.apache.pinot.spi.utils.builder.TableConfigBuilder;
+import org.testng.annotations.Test;
+
+import static org.testng.Assert.assertEquals;
+import static org.testng.Assert.assertNull;
+import static org.testng.Assert.assertTrue;
+
+
+/**
+ * Tests for TimeValidationColumnTransformer.
+ */
+public class TimeValidationColumnTransformerTest {
+
+  @Test
+  public void testIsNoOpWhenNoTimeColumn() {
+    Schema schema = new Schema.SchemaBuilder()
+        .addSingleValueDimension("col1", FieldSpec.DataType.INT)
+        .build();
+
+    TableConfig tableConfig = new TableConfigBuilder(TableType.OFFLINE)
+        .setTableName("testTable")
+        .build();
+
+    TimeValidationColumnTransformer transformer = new 
TimeValidationColumnTransformer(tableConfig, schema);
+    assertTrue(transformer.isNoOp(), "Should be no-op when no time column 
configured");
+  }
+
+  @Test
+  public void testIsNoOpWhenTimeValueCheckDisabled() {
+    Schema schema = new Schema.SchemaBuilder()
+        .addDateTime("timeCol", FieldSpec.DataType.LONG, 
"1:MILLISECONDS:EPOCH", "1:MILLISECONDS")
+        .build();
+
+    TableConfig tableConfig = new TableConfigBuilder(TableType.OFFLINE)
+        .setTableName("testTable")
+        .setTimeColumnName("timeCol")
+        .build();
+    // rowTimeValueCheck is false by default
+
+    TimeValidationColumnTransformer transformer = new 
TimeValidationColumnTransformer(tableConfig, schema);
+    assertTrue(transformer.isNoOp(), "Should be no-op when time value check is 
disabled");
+  }
+
+  @Test
+  public void testIsNoOpWhenTimeValueCheckEnabled() {

Review Comment:
   Test name is misleading: it says "IsNoOp" but the assertion expects the 
transformer to NOT be a no-op. Rename the test method (or flip the assertion) 
so the name matches the behavior being verified.
   ```suggestion
     public void testIsNotNoOpWhenTimeValueCheckEnabled() {
   ```



##########
pinot-segment-local/src/main/java/org/apache/pinot/segment/local/columntransformer/SanitizationColumnTransformer.java:
##########
@@ -0,0 +1,76 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.pinot.segment.local.columntransformer;
+
+import javax.annotation.Nullable;
+import org.apache.pinot.segment.local.utils.SanitizationTransformerUtils;
+import 
org.apache.pinot.segment.local.utils.SanitizationTransformerUtils.SanitizationResult;
+import 
org.apache.pinot.segment.local.utils.SanitizationTransformerUtils.SanitizedColumnInfo;
+import org.apache.pinot.spi.columntransformer.ColumnTransformer;
+import org.apache.pinot.spi.data.FieldSpec;
+
+
+/**
+ * Column transformer that sanitizes STRING, JSON, and BYTES column values.
+ *
+ * <p>The sanitization rules include:
+ * <ul>
+ *   <li>No {@code null} characters in string values</li>
+ *   <li>String values are within the length limit</li>
+ * </ul>
+ *
+ * <p>Uses the MaxLengthExceedStrategy in the FieldSpec to decide what to do 
when the value exceeds the max:
+ * <ul>
+ *   <li>TRIM_LENGTH: The value is trimmed to the max length</li>
+ *   <li>SUBSTITUTE_DEFAULT_VALUE: The value is replaced with the default null 
value string</li>
+ *   <li>ERROR: An exception is thrown</li>
+ *   <li>NO_ACTION: The value is kept as is if no NULL_CHARACTER present, else 
trimmed till NULL</li>
+ * </ul>
+ *
+ * <p>NOTE: should put this after {@link DataTypeColumnTransformer} so that 
all values follow the data types in
+ * FieldSpec, and after {@link NullValueColumnTransformer} so that before 
sanitization, all values are non-null
+ * and follow the data types defined in the schema.
+ */
+public class SanitizationColumnTransformer implements ColumnTransformer {
+
+  @Nullable
+  private final SanitizedColumnInfo _columnInfo;
+  /**
+   * Create a SanitizationColumnTransformer.
+   *
+   * @param fieldSpec The field specification for the column
+   */
+  public SanitizationColumnTransformer(FieldSpec fieldSpec) {
+    _columnInfo = 
SanitizationTransformerUtils.getSanitizedColumnInfo(fieldSpec);
+  }
+
+  @Override
+  public boolean isNoOp() {
+    return _columnInfo == null;
+  }
+
+  @Override
+  public Object transform(@Nullable Object value) {
+    SanitizationResult result = 
SanitizationTransformerUtils.sanitizeValue(_columnInfo, value);
+    if (result != null) {
+      return result.getValue();
+    }
+    return value;

Review Comment:
   _columnInfo is `@Nullable` (and isNoOp() uses that), but transform() still 
calls SanitizationTransformerUtils.sanitizeValue(_columnInfo, value) without a 
null guard. If transform() is called on a no-op instance, this will throw a 
NullPointerException. Add a fast-path in transform() for _columnInfo == null 
(return value), or make sanitizeValue() accept a nullable columnInfo and treat 
null as "no sanitization".



##########
pinot-segment-local/src/main/java/org/apache/pinot/segment/local/columntransformer/TimeValidationColumnTransformer.java:
##########
@@ -0,0 +1,61 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.pinot.segment.local.columntransformer;
+
+import javax.annotation.Nullable;
+import org.apache.pinot.segment.local.utils.TimeValidationTransformerUtils;
+import 
org.apache.pinot.segment.local.utils.TimeValidationTransformerUtils.TimeValidationConfig;
+import org.apache.pinot.spi.columntransformer.ColumnTransformer;
+import org.apache.pinot.spi.config.table.TableConfig;
+import org.apache.pinot.spi.data.Schema;
+
+
+/**
+ * Column transformer that validates time column values are within the valid 
range (1971-2071).
+ * Invalid values are transformed to null.
+ *
+ * <p>This transformer should be applied after {@link 
DataTypeColumnTransformer} so that all values
+ * follow the data types in FieldSpec, and before {@link 
NullValueColumnTransformer} so that
+ * invalidated values can be filled with defaults.
+ */
+public class TimeValidationColumnTransformer implements ColumnTransformer {
+
+  @Nullable
+  private final TimeValidationConfig _config;
+
+  /**
+   * Create a TimeValidationColumnTransformer.
+   *
+   * @param tableConfig The table configuration
+   * @param schema The schema
+   */
+  public TimeValidationColumnTransformer(TableConfig tableConfig, Schema 
schema) {
+    _config = TimeValidationTransformerUtils.getConfig(tableConfig, schema);
+  }
+
+  @Override
+  public boolean isNoOp() {
+    return _config == null;
+  }
+
+  @Override
+  public Object transform(@Nullable Object value) {

Review Comment:
   _config is marked @Nullable, but transform() unconditionally passes it to 
TimeValidationTransformerUtils.transformTimeValue(), which dereferences config. 
If transform() is ever called when isNoOp() is true, this will NPE. Consider 
guarding in transform() (return the input unchanged when _config is null) or 
changing transformTimeValue() to accept a nullable config and behave as 
identity when disabled.
   ```suggestion
     public Object transform(@Nullable Object value) {
       if (_config == null) {
         // When validation is disabled, behave as a no-op.
         return value;
       }
   ```



##########
pinot-segment-local/src/test/java/org/apache/pinot/segment/local/columntransformer/SanitizationColumnTransformerTest.java:
##########
@@ -0,0 +1,226 @@
+/**
+ * Licensed to the Apache Software Foundation (ASF) under one
+ * or more contributor license agreements.  See the NOTICE file
+ * distributed with this work for additional information
+ * regarding copyright ownership.  The ASF licenses this file
+ * to you under the Apache License, Version 2.0 (the
+ * "License"); you may not use this file except in compliance
+ * with the License.  You may obtain a copy of the License at
+ *
+ *   http://www.apache.org/licenses/LICENSE-2.0
+ *
+ * Unless required by applicable law or agreed to in writing,
+ * software distributed under the License is distributed on an
+ * "AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+ * KIND, either express or implied.  See the License for the
+ * specific language governing permissions and limitations
+ * under the License.
+ */
+package org.apache.pinot.segment.local.columntransformer;
+
+import org.apache.pinot.spi.data.DimensionFieldSpec;
+import org.apache.pinot.spi.data.FieldSpec;
+import org.apache.pinot.spi.data.FieldSpec.MaxLengthExceedStrategy;
+import org.apache.pinot.spi.data.Schema;
+import org.testng.annotations.Test;
+
+import static org.testng.Assert.assertEquals;
+import static org.testng.Assert.assertFalse;
+import static org.testng.Assert.assertNull;
+import static org.testng.Assert.assertTrue;
+
+
+/**
+ * Tests for SanitizationColumnTransformer.
+ */
+public class SanitizationColumnTransformerTest {
+
+  @Test
+  public void testIsNoOpForInt() {
+    Schema schema = new Schema.SchemaBuilder()
+        .addSingleValueDimension("intCol", FieldSpec.DataType.INT)
+        .build();
+    FieldSpec fieldSpec = schema.getFieldSpecFor("intCol");
+
+    SanitizationColumnTransformer transformer = new 
SanitizationColumnTransformer(fieldSpec);
+    assertTrue(transformer.isNoOp(), "INT column should be no-op");
+  }
+
+  @Test
+  public void testIsNotNoOpForString() {
+    Schema schema = new Schema.SchemaBuilder()
+        .addSingleValueDimension("stringCol", FieldSpec.DataType.STRING)
+        .build();
+    FieldSpec fieldSpec = schema.getFieldSpecFor("stringCol");
+
+    SanitizationColumnTransformer transformer = new 
SanitizationColumnTransformer(fieldSpec);
+    assertFalse(transformer.isNoOp(), "STRING column should not be no-op");
+  }
+
+  @Test
+  public void testIsNotNoOpForJson() {
+    Schema schema = new Schema.SchemaBuilder()
+        .addSingleValueDimension("jsonCol", FieldSpec.DataType.JSON)
+        .build();
+    FieldSpec fieldSpec = schema.getFieldSpecFor("jsonCol");
+
+    SanitizationColumnTransformer transformer = new 
SanitizationColumnTransformer(fieldSpec);
+    assertFalse(transformer.isNoOp(), "JSON column should not be no-op");
+  }
+
+  @Test
+  public void testIsNotNoOpForBytes() {
+    Schema schema = new Schema.SchemaBuilder()
+        .addSingleValueDimension("bytesCol", FieldSpec.DataType.BYTES)
+        .build();
+    FieldSpec fieldSpec = schema.getFieldSpecFor("bytesCol");
+
+    SanitizationColumnTransformer transformer = new 
SanitizationColumnTransformer(fieldSpec);
+    // BYTES with NO_ACTION strategy should be no-op
+    // Default strategy depends on field spec configuration
+    assertTrue(transformer.isNoOp() || !transformer.isNoOp());

Review Comment:
   This assertion is a tautology and doesn't validate any behavior (it will 
always pass). Configure the BYTES FieldSpec so the expected max-length strategy 
is deterministic (e.g., set TRIM_LENGTH to assert not-no-op, or ensure 
NO_ACTION to assert no-op) and assert the specific expected outcome.
   ```suggestion
       DimensionFieldSpec bytesFieldSpec = new DimensionFieldSpec("bytesCol", 
FieldSpec.DataType.BYTES, true);
       // Configure a deterministic max-length exceed strategy so behavior is 
well-defined
       
bytesFieldSpec.setMaxLengthExceedStrategy(MaxLengthExceedStrategy.TRIM_LENGTH);
       Schema schema = new Schema.SchemaBuilder()
           .addField(bytesFieldSpec)
           .build();
       FieldSpec fieldSpec = schema.getFieldSpecFor("bytesCol");
   
       SanitizationColumnTransformer transformer = new 
SanitizationColumnTransformer(fieldSpec);
       // BYTES with TRIM_LENGTH strategy should not be no-op
       assertFalse(transformer.isNoOp(), "BYTES column with TRIM_LENGTH 
strategy should not be no-op");
   ```



-- 
This is an automated message from the Apache Git Service.
To respond to the message, please log on to GitHub and use the
URL above to go to the specific comment.

To unsubscribe, e-mail: [email protected]

For queries about this service, please contact Infrastructure at:
[email protected]


---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to