This is an automated email from the ASF dual-hosted git repository.

siddteotia pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-pinot.git


The following commit(s) were added to refs/heads/master by this push:
     new 20c17f6  Throw exception and give error message when creating schema 
with spaces in column name (#6735)
20c17f6 is described below

commit 20c17f68e146945b69695cdd07feaab5b8a99a0b
Author: Liang Mingqiang <[email protected]>
AuthorDate: Wed Mar 31 19:27:01 2021 -0700

    Throw exception and give error message when creating schema with spaces in 
column name (#6735)
    
    * Throw exception and give error message when creating schema with spaces 
in column name
    
    * add unit test
    
    * Add more test cases, comments, etc
---
 .../org/apache/pinot/core/util/SchemaUtils.java    | 20 +++++----
 .../apache/pinot/core/util/SchemaUtilsTest.java    | 52 +++++++++++++++++++---
 2 files changed, 57 insertions(+), 15 deletions(-)

diff --git 
a/pinot-core/src/main/java/org/apache/pinot/core/util/SchemaUtils.java 
b/pinot-core/src/main/java/org/apache/pinot/core/util/SchemaUtils.java
index 6462dbd..41fecc4 100644
--- a/pinot-core/src/main/java/org/apache/pinot/core/util/SchemaUtils.java
+++ b/pinot-core/src/main/java/org/apache/pinot/core/util/SchemaUtils.java
@@ -23,6 +23,7 @@ import java.util.Collections;
 import java.util.HashSet;
 import java.util.List;
 import java.util.Set;
+import org.apache.commons.lang3.StringUtils;
 import org.apache.pinot.core.data.function.FunctionEvaluator;
 import org.apache.pinot.core.data.function.FunctionEvaluatorFactory;
 import org.apache.pinot.spi.config.table.TableConfig;
@@ -61,14 +62,15 @@ public class SchemaUtils {
 
   /**
    * Validates the following:
-   * 1) Checks valid transform function -
+   * 1) Column name should not contain blank space.
+   * 2) Checks valid transform function -
    *   for a field spec with transform function, the source column name and 
destination column name are exclusive i.e. do not allow using source column 
name for destination column
    *   ensure transform function string can be used to create a {@link 
FunctionEvaluator}
-   * 2) Checks for chained transforms/derived transform - not supported yet
-   * TODO: Transform functions have moved to table config. Once we stop 
supporting them in schema, remove the validations 1 and 2
-   * 3) Checks valid timeFieldSpec - if incoming and outgoing granularity spec 
are different a) the names cannot be same b) cannot use SIMPLE_DATE_FORMAT for 
conversion
-   * 4) Checks valid dateTimeFieldSpecs - checks format and granularity string
-   * 5) Schema validations from {@link Schema#validate}
+   * 3) Checks for chained transforms/derived transform - not supported yet
+   * TODO: Transform functions have moved to table config. Once we stop 
supporting them in schema, remove the validations 2 and 3
+   * 4) Checks valid timeFieldSpec - if incoming and outgoing granularity spec 
are different a) the names cannot be same b) cannot use SIMPLE_DATE_FORMAT for 
conversion
+   * 5) Checks valid dateTimeFieldSpecs - checks format and granularity string
+   * 6) Schema validations from {@link Schema#validate}
    */
   public static void validate(Schema schema) {
     schema.validate();
@@ -79,6 +81,8 @@ public class SchemaUtils {
     for (FieldSpec fieldSpec : schema.getAllFieldSpecs()) {
       if (!fieldSpec.isVirtualColumn()) {
         String column = fieldSpec.getName();
+        Preconditions.checkState(!StringUtils.containsWhitespace(column),
+            "The column name \"%s\" should not contain blank space.", column);
         primaryKeyColumnCandidates.add(column);
         String transformFunction = fieldSpec.getTransformFunction();
         if (transformFunction != null) {
@@ -108,8 +112,8 @@ public class SchemaUtils {
         transformedColumns.retainAll(argumentColumns));
     if (schema.getPrimaryKeyColumns() != null) {
       for (String primaryKeyColumn : schema.getPrimaryKeyColumns()) {
-        
Preconditions.checkState(primaryKeyColumnCandidates.contains(primaryKeyColumn),
-            "The primary key column must exist");
+        Preconditions
+            .checkState(primaryKeyColumnCandidates.contains(primaryKeyColumn), 
"The primary key column must exist");
       }
     }
   }
diff --git 
a/pinot-core/src/test/java/org/apache/pinot/core/util/SchemaUtilsTest.java 
b/pinot-core/src/test/java/org/apache/pinot/core/util/SchemaUtilsTest.java
index 19d7fab..1741b65 100644
--- a/pinot-core/src/test/java/org/apache/pinot/core/util/SchemaUtilsTest.java
+++ b/pinot-core/src/test/java/org/apache/pinot/core/util/SchemaUtilsTest.java
@@ -23,9 +23,9 @@ import java.io.IOException;
 import java.util.ArrayList;
 import java.util.List;
 import java.util.concurrent.TimeUnit;
-import org.apache.pinot.spi.config.table.ingestion.IngestionConfig;
 import org.apache.pinot.spi.config.table.TableConfig;
 import org.apache.pinot.spi.config.table.TableType;
+import org.apache.pinot.spi.config.table.ingestion.IngestionConfig;
 import org.apache.pinot.spi.config.table.ingestion.TransformConfig;
 import org.apache.pinot.spi.data.DateTimeFieldSpec;
 import org.apache.pinot.spi.data.DimensionFieldSpec;
@@ -90,7 +90,8 @@ public class SchemaUtilsTest {
     // schema doesn't have destination columns from transformConfigs
     schema = new Schema.SchemaBuilder().setSchemaName(TABLE_NAME).build();
     tableConfig = new 
TableConfigBuilder(TableType.OFFLINE).setTableName(TABLE_NAME).setIngestionConfig(
-        new IngestionConfig(null, null, null, Lists.newArrayList(new 
TransformConfig("colA", "round(colB, 1000)")))).build();
+        new IngestionConfig(null, null, null, Lists.newArrayList(new 
TransformConfig("colA", "round(colB, 1000)"))))
+        .build();
     try {
       SchemaUtils.validate(schema, Lists.newArrayList(tableConfig));
       Assert.fail("Should fail schema validation, as colA is not present in 
schema");
@@ -138,7 +139,8 @@ public class SchemaUtilsTest {
         .addDateTime(TIME_COLUMN, DataType.LONG, "1:MILLISECONDS:EPOCH", 
"1:HOURS").build();
     tableConfig = new 
TableConfigBuilder(TableType.REALTIME).setTableName(TABLE_NAME).setTimeColumnName(TIME_COLUMN)
         .setIngestionConfig(
-            new IngestionConfig(null, null, null, Lists.newArrayList(new 
TransformConfig("colA", "round(colB, 1000)")))).build();
+            new IngestionConfig(null, null, null, Lists.newArrayList(new 
TransformConfig("colA", "round(colB, 1000)"))))
+        .build();
     try {
       SchemaUtils.validate(schema, Lists.newArrayList(tableConfig));
       Assert.fail("Should fail schema validation, as colA is not present in 
schema");
@@ -227,15 +229,15 @@ public class SchemaUtilsTest {
     // non-existing column used as primary key
     pinotSchema = new Schema.SchemaBuilder()
         .addTime(new TimeGranularitySpec(DataType.LONG, TimeUnit.MILLISECONDS, 
"incoming"),
-            new TimeGranularitySpec(DataType.INT, TimeUnit.DAYS, 
"outgoing")).addSingleValueDimension("col", DataType.INT)
-        .setPrimaryKeyColumns(Lists.newArrayList("test")).build();
+            new TimeGranularitySpec(DataType.INT, TimeUnit.DAYS, "outgoing"))
+        .addSingleValueDimension("col", 
DataType.INT).setPrimaryKeyColumns(Lists.newArrayList("test")).build();
     checkValidationFails(pinotSchema);
 
     // valid primary key
     pinotSchema = new Schema.SchemaBuilder()
         .addTime(new TimeGranularitySpec(DataType.LONG, TimeUnit.MILLISECONDS, 
"incoming"),
-            new TimeGranularitySpec(DataType.INT, TimeUnit.DAYS, 
"outgoing")).addSingleValueDimension("col", DataType.INT)
-        .setPrimaryKeyColumns(Lists.newArrayList("col")).build();
+            new TimeGranularitySpec(DataType.INT, TimeUnit.DAYS, "outgoing"))
+        .addSingleValueDimension("col", 
DataType.INT).setPrimaryKeyColumns(Lists.newArrayList("col")).build();
     SchemaUtils.validate(pinotSchema);
   }
 
@@ -313,6 +315,42 @@ public class SchemaUtilsTest {
     SchemaUtils.validate(pinotSchema);
   }
 
+  /**
+   * Testcases for testing column name validation logic.
+   * Currently column name validation only checks no blank space in column 
names. Should we add more validation on column
+   * names later on, we can corresponding tests here.
+   */
+  @Test
+  public void testColumnNameValidation()
+      throws IOException {
+    Schema pinotSchema;
+    // A schema all column names does not contains blank space should pass the 
validation.
+    pinotSchema = Schema.fromString(
+        "{\"schemaName\":\"testSchema\"," + "\"dimensionFieldSpecs\":[ 
{\"name\":\"dim1\",\"dataType\":\"STRING\"}],"
+            + 
"\"dateTimeFieldSpecs\":[{\"name\":\"dt1\",\"dataType\":\"INT\",\"format\":\"1:DAYS:SIMPLE_DATE_FORMAT:yyyyMMdd\",\"granularity\":\"1:DAYS\"}]}");
+    SchemaUtils.validate(pinotSchema);
+    // Validation will fail if dimensionFieldSpecs column name contain blank 
space.
+    pinotSchema = Schema.fromString(
+        "{\"schemaName\":\"testSchema\"," + "\"dimensionFieldSpecs\":[ 
{\"name\":\"dim 1\",\"dataType\":\"STRING\"}],"
+            + 
"\"dateTimeFieldSpecs\":[{\"name\":\"dt1\",\"dataType\":\"INT\",\"format\":\"1:DAYS:SIMPLE_DATE_FORMAT:yyyyMMdd\",\"granularity\":\"1:DAYS\"}]}");
+    checkValidationFails(pinotSchema);
+    // Validation will fail if dateTimeFieldSpecs column name contain blank 
space.
+    pinotSchema = Schema.fromString(
+        "{\"schemaName\":\"testSchema\"," + "\"dimensionFieldSpecs\":[ 
{\"name\":\"dim1\",\"dataType\":\"STRING\"}],"
+            + "\"dateTimeFieldSpecs\":[{\"name\":\"dt 
1\",\"dataType\":\"INT\",\"format\":\"1:DAYS:SIMPLE_DATE_FORMAT:yyyyMMdd\",\"granularity\":\"1:DAYS\"}]}");
+    checkValidationFails(pinotSchema);
+    // Test case for column name has leading blank space.
+    pinotSchema = Schema.fromString(
+        "{\"schemaName\":\"testSchema\"," + "\"dimensionFieldSpecs\":[ 
{\"name\":\" dim1\",\"dataType\":\"STRING\"}],"
+            + 
"\"dateTimeFieldSpecs\":[{\"name\":\"dt1\",\"dataType\":\"INT\",\"format\":\"1:DAYS:SIMPLE_DATE_FORMAT:yyyyMMdd\",\"granularity\":\"1:DAYS\"}]}");
+    checkValidationFails(pinotSchema);
+    // Test case for column name has trailing blank space.
+    pinotSchema = Schema.fromString(
+        "{\"schemaName\":\"testSchema\"," + "\"dimensionFieldSpecs\":[ 
{\"name\":\"dim1\",\"dataType\":\"STRING\"}],"
+            + "\"dateTimeFieldSpecs\":[{\"name\":\"dt1  
\",\"dataType\":\"INT\",\"format\":\"1:DAYS:SIMPLE_DATE_FORMAT:yyyyMMdd\",\"granularity\":\"1:DAYS\"}]}");
+    checkValidationFails(pinotSchema);
+  }
+
   private void checkValidationFails(Schema pinotSchema) {
     try {
       SchemaUtils.validate(pinotSchema);

---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]

Reply via email to