This is an automated email from the ASF dual-hosted git repository.
siddteotia pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/incubator-pinot.git
The following commit(s) were added to refs/heads/master by this push:
new 20c17f6 Throw exception and give error message when creating schema
with spaces in column name (#6735)
20c17f6 is described below
commit 20c17f68e146945b69695cdd07feaab5b8a99a0b
Author: Liang Mingqiang <[email protected]>
AuthorDate: Wed Mar 31 19:27:01 2021 -0700
Throw exception and give error message when creating schema with spaces in
column name (#6735)
* Throw exception and give error message when creating schema with spaces
in column name
* add unit test
* Add more test cases, comments, etc
---
.../org/apache/pinot/core/util/SchemaUtils.java | 20 +++++----
.../apache/pinot/core/util/SchemaUtilsTest.java | 52 +++++++++++++++++++---
2 files changed, 57 insertions(+), 15 deletions(-)
diff --git
a/pinot-core/src/main/java/org/apache/pinot/core/util/SchemaUtils.java
b/pinot-core/src/main/java/org/apache/pinot/core/util/SchemaUtils.java
index 6462dbd..41fecc4 100644
--- a/pinot-core/src/main/java/org/apache/pinot/core/util/SchemaUtils.java
+++ b/pinot-core/src/main/java/org/apache/pinot/core/util/SchemaUtils.java
@@ -23,6 +23,7 @@ import java.util.Collections;
import java.util.HashSet;
import java.util.List;
import java.util.Set;
+import org.apache.commons.lang3.StringUtils;
import org.apache.pinot.core.data.function.FunctionEvaluator;
import org.apache.pinot.core.data.function.FunctionEvaluatorFactory;
import org.apache.pinot.spi.config.table.TableConfig;
@@ -61,14 +62,15 @@ public class SchemaUtils {
/**
* Validates the following:
- * 1) Checks valid transform function -
+ * 1) Column name should not contain blank space.
+ * 2) Checks valid transform function -
* for a field spec with transform function, the source column name and
destination column name are exclusive i.e. do not allow using source column
name for destination column
* ensure transform function string can be used to create a {@link
FunctionEvaluator}
- * 2) Checks for chained transforms/derived transform - not supported yet
- * TODO: Transform functions have moved to table config. Once we stop
supporting them in schema, remove the validations 1 and 2
- * 3) Checks valid timeFieldSpec - if incoming and outgoing granularity spec
are different a) the names cannot be same b) cannot use SIMPLE_DATE_FORMAT for
conversion
- * 4) Checks valid dateTimeFieldSpecs - checks format and granularity string
- * 5) Schema validations from {@link Schema#validate}
+ * 3) Checks for chained transforms/derived transform - not supported yet
+ * TODO: Transform functions have moved to table config. Once we stop
supporting them in schema, remove the validations 2 and 3
+ * 4) Checks valid timeFieldSpec - if incoming and outgoing granularity spec
are different a) the names cannot be same b) cannot use SIMPLE_DATE_FORMAT for
conversion
+ * 5) Checks valid dateTimeFieldSpecs - checks format and granularity string
+ * 6) Schema validations from {@link Schema#validate}
*/
public static void validate(Schema schema) {
schema.validate();
@@ -79,6 +81,8 @@ public class SchemaUtils {
for (FieldSpec fieldSpec : schema.getAllFieldSpecs()) {
if (!fieldSpec.isVirtualColumn()) {
String column = fieldSpec.getName();
+ Preconditions.checkState(!StringUtils.containsWhitespace(column),
+ "The column name \"%s\" should not contain blank space.", column);
primaryKeyColumnCandidates.add(column);
String transformFunction = fieldSpec.getTransformFunction();
if (transformFunction != null) {
@@ -108,8 +112,8 @@ public class SchemaUtils {
transformedColumns.retainAll(argumentColumns));
if (schema.getPrimaryKeyColumns() != null) {
for (String primaryKeyColumn : schema.getPrimaryKeyColumns()) {
-
Preconditions.checkState(primaryKeyColumnCandidates.contains(primaryKeyColumn),
- "The primary key column must exist");
+ Preconditions
+ .checkState(primaryKeyColumnCandidates.contains(primaryKeyColumn),
"The primary key column must exist");
}
}
}
diff --git
a/pinot-core/src/test/java/org/apache/pinot/core/util/SchemaUtilsTest.java
b/pinot-core/src/test/java/org/apache/pinot/core/util/SchemaUtilsTest.java
index 19d7fab..1741b65 100644
--- a/pinot-core/src/test/java/org/apache/pinot/core/util/SchemaUtilsTest.java
+++ b/pinot-core/src/test/java/org/apache/pinot/core/util/SchemaUtilsTest.java
@@ -23,9 +23,9 @@ import java.io.IOException;
import java.util.ArrayList;
import java.util.List;
import java.util.concurrent.TimeUnit;
-import org.apache.pinot.spi.config.table.ingestion.IngestionConfig;
import org.apache.pinot.spi.config.table.TableConfig;
import org.apache.pinot.spi.config.table.TableType;
+import org.apache.pinot.spi.config.table.ingestion.IngestionConfig;
import org.apache.pinot.spi.config.table.ingestion.TransformConfig;
import org.apache.pinot.spi.data.DateTimeFieldSpec;
import org.apache.pinot.spi.data.DimensionFieldSpec;
@@ -90,7 +90,8 @@ public class SchemaUtilsTest {
// schema doesn't have destination columns from transformConfigs
schema = new Schema.SchemaBuilder().setSchemaName(TABLE_NAME).build();
tableConfig = new
TableConfigBuilder(TableType.OFFLINE).setTableName(TABLE_NAME).setIngestionConfig(
- new IngestionConfig(null, null, null, Lists.newArrayList(new
TransformConfig("colA", "round(colB, 1000)")))).build();
+ new IngestionConfig(null, null, null, Lists.newArrayList(new
TransformConfig("colA", "round(colB, 1000)"))))
+ .build();
try {
SchemaUtils.validate(schema, Lists.newArrayList(tableConfig));
Assert.fail("Should fail schema validation, as colA is not present in
schema");
@@ -138,7 +139,8 @@ public class SchemaUtilsTest {
.addDateTime(TIME_COLUMN, DataType.LONG, "1:MILLISECONDS:EPOCH",
"1:HOURS").build();
tableConfig = new
TableConfigBuilder(TableType.REALTIME).setTableName(TABLE_NAME).setTimeColumnName(TIME_COLUMN)
.setIngestionConfig(
- new IngestionConfig(null, null, null, Lists.newArrayList(new
TransformConfig("colA", "round(colB, 1000)")))).build();
+ new IngestionConfig(null, null, null, Lists.newArrayList(new
TransformConfig("colA", "round(colB, 1000)"))))
+ .build();
try {
SchemaUtils.validate(schema, Lists.newArrayList(tableConfig));
Assert.fail("Should fail schema validation, as colA is not present in
schema");
@@ -227,15 +229,15 @@ public class SchemaUtilsTest {
// non-existing column used as primary key
pinotSchema = new Schema.SchemaBuilder()
.addTime(new TimeGranularitySpec(DataType.LONG, TimeUnit.MILLISECONDS,
"incoming"),
- new TimeGranularitySpec(DataType.INT, TimeUnit.DAYS,
"outgoing")).addSingleValueDimension("col", DataType.INT)
- .setPrimaryKeyColumns(Lists.newArrayList("test")).build();
+ new TimeGranularitySpec(DataType.INT, TimeUnit.DAYS, "outgoing"))
+ .addSingleValueDimension("col",
DataType.INT).setPrimaryKeyColumns(Lists.newArrayList("test")).build();
checkValidationFails(pinotSchema);
// valid primary key
pinotSchema = new Schema.SchemaBuilder()
.addTime(new TimeGranularitySpec(DataType.LONG, TimeUnit.MILLISECONDS,
"incoming"),
- new TimeGranularitySpec(DataType.INT, TimeUnit.DAYS,
"outgoing")).addSingleValueDimension("col", DataType.INT)
- .setPrimaryKeyColumns(Lists.newArrayList("col")).build();
+ new TimeGranularitySpec(DataType.INT, TimeUnit.DAYS, "outgoing"))
+ .addSingleValueDimension("col",
DataType.INT).setPrimaryKeyColumns(Lists.newArrayList("col")).build();
SchemaUtils.validate(pinotSchema);
}
@@ -313,6 +315,42 @@ public class SchemaUtilsTest {
SchemaUtils.validate(pinotSchema);
}
+ /**
+ * Testcases for testing column name validation logic.
+ * Currently column name validation only checks no blank space in column
names. Should we add more validation on column
+ * names later on, we can corresponding tests here.
+ */
+ @Test
+ public void testColumnNameValidation()
+ throws IOException {
+ Schema pinotSchema;
+ // A schema all column names does not contains blank space should pass the
validation.
+ pinotSchema = Schema.fromString(
+ "{\"schemaName\":\"testSchema\"," + "\"dimensionFieldSpecs\":[
{\"name\":\"dim1\",\"dataType\":\"STRING\"}],"
+ +
"\"dateTimeFieldSpecs\":[{\"name\":\"dt1\",\"dataType\":\"INT\",\"format\":\"1:DAYS:SIMPLE_DATE_FORMAT:yyyyMMdd\",\"granularity\":\"1:DAYS\"}]}");
+ SchemaUtils.validate(pinotSchema);
+ // Validation will fail if dimensionFieldSpecs column name contain blank
space.
+ pinotSchema = Schema.fromString(
+ "{\"schemaName\":\"testSchema\"," + "\"dimensionFieldSpecs\":[
{\"name\":\"dim 1\",\"dataType\":\"STRING\"}],"
+ +
"\"dateTimeFieldSpecs\":[{\"name\":\"dt1\",\"dataType\":\"INT\",\"format\":\"1:DAYS:SIMPLE_DATE_FORMAT:yyyyMMdd\",\"granularity\":\"1:DAYS\"}]}");
+ checkValidationFails(pinotSchema);
+ // Validation will fail if dateTimeFieldSpecs column name contain blank
space.
+ pinotSchema = Schema.fromString(
+ "{\"schemaName\":\"testSchema\"," + "\"dimensionFieldSpecs\":[
{\"name\":\"dim1\",\"dataType\":\"STRING\"}],"
+ + "\"dateTimeFieldSpecs\":[{\"name\":\"dt
1\",\"dataType\":\"INT\",\"format\":\"1:DAYS:SIMPLE_DATE_FORMAT:yyyyMMdd\",\"granularity\":\"1:DAYS\"}]}");
+ checkValidationFails(pinotSchema);
+ // Test case for column name has leading blank space.
+ pinotSchema = Schema.fromString(
+ "{\"schemaName\":\"testSchema\"," + "\"dimensionFieldSpecs\":[
{\"name\":\" dim1\",\"dataType\":\"STRING\"}],"
+ +
"\"dateTimeFieldSpecs\":[{\"name\":\"dt1\",\"dataType\":\"INT\",\"format\":\"1:DAYS:SIMPLE_DATE_FORMAT:yyyyMMdd\",\"granularity\":\"1:DAYS\"}]}");
+ checkValidationFails(pinotSchema);
+ // Test case for column name has trailing blank space.
+ pinotSchema = Schema.fromString(
+ "{\"schemaName\":\"testSchema\"," + "\"dimensionFieldSpecs\":[
{\"name\":\"dim1\",\"dataType\":\"STRING\"}],"
+ + "\"dateTimeFieldSpecs\":[{\"name\":\"dt1
\",\"dataType\":\"INT\",\"format\":\"1:DAYS:SIMPLE_DATE_FORMAT:yyyyMMdd\",\"granularity\":\"1:DAYS\"}]}");
+ checkValidationFails(pinotSchema);
+ }
+
private void checkValidationFails(Schema pinotSchema) {
try {
SchemaUtils.validate(pinotSchema);
---------------------------------------------------------------------
To unsubscribe, e-mail: [email protected]
For additional commands, e-mail: [email protected]