Repository: drill Updated Branches: refs/heads/master ac823fe8b -> d43324f89
DRILL-2598: Throw validation error if CREATE VIEW/CTAS contains duplicate columns in definition Project: http://git-wip-us.apache.org/repos/asf/drill/repo Commit: http://git-wip-us.apache.org/repos/asf/drill/commit/ed02612a Tree: http://git-wip-us.apache.org/repos/asf/drill/tree/ed02612a Diff: http://git-wip-us.apache.org/repos/asf/drill/diff/ed02612a Branch: refs/heads/master Commit: ed02612a1b4eb419315791f87ab889e3765c1ceb Parents: 703314b Author: vkorukanti <[email protected]> Authored: Mon May 4 22:26:41 2015 -0700 Committer: vkorukanti <[email protected]> Committed: Tue May 5 11:03:58 2015 -0700 ---------------------------------------------------------------------- .../planner/sql/handlers/SqlHandlerUtil.java | 38 +++++- .../org/apache/drill/exec/sql/TestCTAS.java | 95 +++++++++++++ .../apache/drill/exec/sql/TestViewSupport.java | 132 +++++++++++++++++-- 3 files changed, 250 insertions(+), 15 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/drill/blob/ed02612a/exec/java-exec/src/main/java/org/apache/drill/exec/planner/sql/handlers/SqlHandlerUtil.java ---------------------------------------------------------------------- diff --git a/exec/java-exec/src/main/java/org/apache/drill/exec/planner/sql/handlers/SqlHandlerUtil.java b/exec/java-exec/src/main/java/org/apache/drill/exec/planner/sql/handlers/SqlHandlerUtil.java index 50af972..7ae5e0d 100644 --- a/exec/java-exec/src/main/java/org/apache/drill/exec/planner/sql/handlers/SqlHandlerUtil.java +++ b/exec/java-exec/src/main/java/org/apache/drill/exec/planner/sql/handlers/SqlHandlerUtil.java @@ -17,7 +17,9 @@ */ package org.apache.drill.exec.planner.sql.handlers; +import com.google.common.collect.Sets; import org.apache.calcite.schema.Table; +import org.apache.calcite.sql.TypedSqlNode; import org.apache.calcite.tools.Planner; import org.apache.calcite.tools.RelConversionException; import org.apache.drill.common.exceptions.DrillException; @@ -33,6 +35,7 @@ import org.apache.calcite.rel.type.RelDataType; import org.apache.calcite.sql.SqlNode; import org.apache.drill.exec.store.ischema.Records; +import java.util.HashSet; import java.util.List; public class SqlHandlerUtil { @@ -55,12 +58,23 @@ public class SqlHandlerUtil { public static RelNode resolveNewTableRel(boolean isNewTableView, Planner planner, List<String> tableFieldNames, SqlNode newTableQueryDef) throws ValidationException, RelConversionException { - SqlNode validatedQuery = planner.validate(newTableQueryDef); - RelNode validatedQueryRelNode = planner.convert(validatedQuery); - if (tableFieldNames.size() > 0) { - final RelDataType queryRowType = validatedQueryRelNode.getRowType(); + TypedSqlNode validatedSqlNodeWithType = planner.validateAndGetType(newTableQueryDef); + + // Get the row type of view definition query. + // Reason for getting the row type from validated SqlNode than RelNode is because SqlNode -> RelNode involves + // renaming duplicate fields which is not desired when creating a view or table. + // For ex: SELECT region_id, region_id FROM cp.`region.json` LIMIT 1 returns + // +------------+------------+ + // | region_id | region_id0 | + // +------------+------------+ + // | 0 | 0 | + // +------------+------------+ + // which is not desired when creating new views or tables. + final RelDataType queryRowType = validatedSqlNodeWithType.getType(); + final RelNode validatedQueryRelNode = planner.convert(validatedSqlNodeWithType.getSqlNode()); + if (tableFieldNames.size() > 0) { // Field count should match. if (tableFieldNames.size() != queryRowType.getFieldCount()) { final String tblType = isNewTableView ? "view" : "table"; @@ -78,6 +92,9 @@ public class SqlHandlerUtil { } } + // validate the given field names to make sure there are no duplicates + ensureNoDuplicateColumnNames(tableFieldNames); + // CTAS statement has table field list (ex. below), add a project rel to rename the query fields. // Ex. CREATE TABLE tblname(col1, medianOfCol2, avgOfCol3) AS // SELECT col1, median(col2), avg(col3) FROM sourcetbl GROUP BY col1 ; @@ -86,9 +103,22 @@ public class SqlHandlerUtil { return DrillRelOptUtil.createRename(validatedQueryRelNode, tableFieldNames); } + // As the column names of the view are derived from SELECT query, make sure the query has no duplicate column names + ensureNoDuplicateColumnNames(queryRowType.getFieldNames()); + return validatedQueryRelNode; } + private static void ensureNoDuplicateColumnNames(List<String> fieldNames) throws ValidationException { + final HashSet<String> fieldHashSet = Sets.newHashSetWithExpectedSize(fieldNames.size()); + for(String field : fieldNames) { + if (fieldHashSet.contains(field.toLowerCase())) { + throw new ValidationException(String.format("Duplicate column name [%s]", field)); + } + fieldHashSet.add(field.toLowerCase()); + } + } + public static Table getTableFromSchema(AbstractSchema drillSchema, String tblName) throws DrillException { try { return drillSchema.getTable(tblName); http://git-wip-us.apache.org/repos/asf/drill/blob/ed02612a/exec/java-exec/src/test/java/org/apache/drill/exec/sql/TestCTAS.java ---------------------------------------------------------------------- diff --git a/exec/java-exec/src/test/java/org/apache/drill/exec/sql/TestCTAS.java b/exec/java-exec/src/test/java/org/apache/drill/exec/sql/TestCTAS.java new file mode 100644 index 0000000..5fff956 --- /dev/null +++ b/exec/java-exec/src/test/java/org/apache/drill/exec/sql/TestCTAS.java @@ -0,0 +1,95 @@ +/** + * Licensed to the Apache Software Foundation (ASF) under one + * or more contributor license agreements. See the NOTICE file + * distributed with this work for additional information + * regarding copyright ownership. The ASF licenses this file + * to you under the Apache License, Version 2.0 (the + * "License"); you may not use this file except in compliance + * with the License. You may obtain a copy of the License at + * + * http://www.apache.org/licenses/LICENSE-2.0 + * + * Unless required by applicable law or agreed to in writing, software + * distributed under the License is distributed on an "AS IS" BASIS, + * WITHOUT WARRANTIES OR CONDITIONS OF ANY KIND, either express or implied. + * See the License for the specific language governing permissions and + * limitations under the License. + */ +package org.apache.drill.exec.sql; + +import com.google.common.collect.ImmutableList; +import org.apache.drill.BaseTestQuery; +import org.junit.Test; + +public class TestCTAS extends BaseTestQuery { + @Test // DRILL-2589 + public void withDuplicateColumnsInDef1() throws Exception { + ctasErrorTestHelper("CREATE TABLE %s.%s AS SELECT region_id, region_id FROM cp.`region.json`", + String.format("Error: Duplicate column name [%s]", "region_id") + ); + } + + @Test // DRILL-2589 + public void withDuplicateColumnsInDef2() throws Exception { + ctasErrorTestHelper("CREATE TABLE %s.%s AS SELECT region_id, sales_city, sales_city FROM cp.`region.json`", + String.format("Error: Duplicate column name [%s]", "sales_city") + ); + } + + @Test // DRILL-2589 + public void withDuplicateColumnsInDef3() throws Exception { + ctasErrorTestHelper( + "CREATE TABLE %s.%s(regionid, regionid) " + + "AS SELECT region_id, sales_city FROM cp.`region.json`", + String.format("Error: Duplicate column name [%s]", "regionid") + ); + } + + @Test // DRILL-2589 + public void withDuplicateColumnsInDef4() throws Exception { + ctasErrorTestHelper( + "CREATE TABLE %s.%s(regionid, salescity, salescity) " + + "AS SELECT region_id, sales_city, sales_city FROM cp.`region.json`", + String.format("Error: Duplicate column name [%s]", "salescity") + ); + } + + @Test // DRILL-2589 + public void withDuplicateColumnsInDef5() throws Exception { + ctasErrorTestHelper( + "CREATE TABLE %s.%s(regionid, salescity, SalesCity) " + + "AS SELECT region_id, sales_city, sales_city FROM cp.`region.json`", + String.format("Error: Duplicate column name [%s]", "SalesCity") + ); + } + + @Test // DRILL-2589 + public void whenInEqualColumnCountInTableDefVsInTableQuery() throws Exception { + ctasErrorTestHelper( + "CREATE TABLE %s.%s(regionid, salescity) " + + "AS SELECT region_id, sales_city, sales_region FROM cp.`region.json`", + "Error: table's field list and the table's query field list have different counts." + ); + } + + @Test // DRILL-2589 + public void whenTableQueryColumnHasStarAndTableFiledListIsSpecified() throws Exception { + ctasErrorTestHelper( + "CREATE TABLE %s.%s(regionid, salescity) " + + "AS SELECT region_id, * FROM cp.`region.json`", + "Error: table's query field list has a '*', which is invalid when table's field list is specified." + ); + } + + private static void ctasErrorTestHelper(final String ctasSql, final String errorMsg) + throws Exception { + final String createTableSql = String.format(ctasSql, "dfs_test.tmp", "testTableName"); + + testBuilder() + .sqlQuery(createTableSql) + .unOrdered() + .baselineColumns("ok", "summary") + .baselineValues(false, errorMsg) + .go(); + } +} http://git-wip-us.apache.org/repos/asf/drill/blob/ed02612a/exec/java-exec/src/test/java/org/apache/drill/exec/sql/TestViewSupport.java ---------------------------------------------------------------------- diff --git a/exec/java-exec/src/test/java/org/apache/drill/exec/sql/TestViewSupport.java b/exec/java-exec/src/test/java/org/apache/drill/exec/sql/TestViewSupport.java index 2ae6991..5c2dc90 100644 --- a/exec/java-exec/src/test/java/org/apache/drill/exec/sql/TestViewSupport.java +++ b/exec/java-exec/src/test/java/org/apache/drill/exec/sql/TestViewSupport.java @@ -193,10 +193,10 @@ public class TestViewSupport extends TestBaseViewSupport { "(regionid, salescity)", "SELECT region_id, sales_city FROM cp.`region.json` ORDER BY `region_id` DESC", "SELECT regionid FROM TEST_SCHEMA.TEST_VIEW_NAME LIMIT 2", - new String[] { "regionid" }, + new String[]{"regionid"}, ImmutableList.of( - new Object[] { 109L }, - new Object[] { 108L } + new Object[]{109L}, + new Object[]{108L} ) ); } @@ -209,10 +209,10 @@ public class TestViewSupport extends TestBaseViewSupport { null, "SELECT region_id FROM cp.`region.json` UNION SELECT employee_id FROM cp.`employee.json`", "SELECT regionid FROM TEST_SCHEMA.TEST_VIEW_NAME LIMIT 2", - new String[] { "regionid" }, + new String[]{"regionid"}, ImmutableList.of( - new Object[] { 110L }, - new Object[] { 108L } + new Object[]{110L}, + new Object[]{108L} ) ); } @@ -254,9 +254,9 @@ public class TestViewSupport extends TestBaseViewSupport { null, viewDef, "SELECT * FROM TEST_SCHEMA.TEST_VIEW_NAME LIMIT 1", - new String[] { "n_nationkey", "n_name", "n_regionkey", "n_comment" }, + new String[]{"n_nationkey", "n_name", "n_regionkey", "n_comment"}, ImmutableList.of( - new Object[] { 0, "ALGERIA", 0, " haggle. carefully final deposits detect slyly agai" } + new Object[]{0, "ALGERIA", 0, " haggle. carefully final deposits detect slyly agai"} ) ); } @@ -296,8 +296,8 @@ public class TestViewSupport extends TestBaseViewSupport { // Make sure the new view created returns the data expected. queryViewHelper(String.format("SELECT * FROM %s.`%s` LIMIT 1", TEMP_SCHEMA, viewName), - new String[] { "sales_state_province" }, - ImmutableList.of(new Object[] { "None" }) + new String[]{"sales_state_province" }, + ImmutableList.of(new Object[]{"None"}) ); } finally { dropViewHelper(TEMP_SCHEMA, viewName, TEMP_SCHEMA); @@ -434,7 +434,7 @@ public class TestViewSupport extends TestBaseViewSupport { createViewHelper(TEMP_SCHEMA, viewName, TEMP_SCHEMA, null, "SELECT region_id, sales_city FROM `region.json`"); final String[] baselineColumns = new String[] { "region_id", "sales_city" }; - final List<Object[]> baselineValues = ImmutableList.of(new Object[] { 109L, "Santa Fe"}); + final List<Object[]> baselineValues = ImmutableList.of(new Object[]{109L, "Santa Fe"}); // Query the view queryViewHelper( @@ -480,4 +480,114 @@ public class TestViewSupport extends TestBaseViewSupport { dropViewHelper(TEMP_SCHEMA, viewName, TEMP_SCHEMA); } } + + @Test // DRILL-2589 + public void createViewWithDuplicateColumnsInDef1() throws Exception { + createViewErrorTestHelper( + "CREATE VIEW %s.%s AS SELECT region_id, region_id FROM cp.`region.json`", + String.format("Error: Duplicate column name [%s]", "region_id") + ); + } + + @Test // DRILL-2589 + public void createViewWithDuplicateColumnsInDef2() throws Exception { + createViewErrorTestHelper("CREATE VIEW %s.%s AS SELECT region_id, sales_city, sales_city FROM cp.`region.json`", + String.format("Error: Duplicate column name [%s]", "sales_city") + ); + } + + @Test // DRILL-2589 + public void createViewWithDuplicateColumnsInDef3() throws Exception { + createViewErrorTestHelper( + "CREATE VIEW %s.%s(regionid, regionid) AS SELECT region_id, sales_city FROM cp.`region.json`", + String.format("Error: Duplicate column name [%s]", "regionid") + ); + } + + @Test // DRILL-2589 + public void createViewWithDuplicateColumnsInDef4() throws Exception { + createViewErrorTestHelper( + "CREATE VIEW %s.%s(regionid, salescity, salescity) " + + "AS SELECT region_id, sales_city, sales_city FROM cp.`region.json`", + String.format("Error: Duplicate column name [%s]", "salescity") + ); + } + + @Test // DRILL-2589 + public void createViewWithDuplicateColumnsInDef5() throws Exception { + createViewErrorTestHelper( + "CREATE VIEW %s.%s(regionid, salescity, SalesCity) " + + "AS SELECT region_id, sales_city, sales_city FROM cp.`region.json`", + String.format("Error: Duplicate column name [%s]", "SalesCity") + ); + } + + @Test // DRILL-2589 + public void createViewWithDuplicateColumnsInDef6() throws Exception { + createViewErrorTestHelper( + "CREATE VIEW %s.%s " + + "AS SELECT t1.region_id, t2.region_id FROM cp.`region.json` t1 JOIN cp.`region.json` t2 " + + "ON t1.region_id = t2.region_id LIMIT 1", + String.format("Error: Duplicate column name [%s]", "region_id") + ); + } + + @Test // DRILL-2589 + public void createViewWithUniqueColsInFieldListDuplicateColsInQuery1() throws Exception { + testViewHelper( + TEMP_SCHEMA, + "(regionid1, regionid2)", + "SELECT region_id, region_id FROM cp.`region.json` LIMIT 1", + "SELECT * FROM TEST_SCHEMA.TEST_VIEW_NAME", + new String[]{"regionid1", "regionid2"}, + ImmutableList.of( + new Object[]{0L, 0L} + ) + ); + } + + @Test // DRILL-2589 + public void createViewWithUniqueColsInFieldListDuplicateColsInQuery2() throws Exception { + testViewHelper( + TEMP_SCHEMA, + "(regionid1, regionid2)", + "SELECT t1.region_id, t2.region_id FROM cp.`region.json` t1 JOIN cp.`region.json` t2 " + + "ON t1.region_id = t2.region_id LIMIT 1", + "SELECT * FROM TEST_SCHEMA.TEST_VIEW_NAME", + new String[]{"regionid1", "regionid2"}, + ImmutableList.of( + new Object[]{0L, 0L} + ) + ); + } + + @Test // DRILL-2589 + public void createViewWhenInEqualColumnCountInViewDefVsInViewQuery() throws Exception { + createViewErrorTestHelper( + "CREATE VIEW %s.%s(regionid, salescity) " + + "AS SELECT region_id, sales_city, sales_region FROM cp.`region.json`", + "Error: view's field list and the view's query field list have different counts." + ); + } + + @Test // DRILL-2589 + public void createViewWhenViewQueryColumnHasStarAndViewFiledListIsSpecified() throws Exception { + createViewErrorTestHelper( + "CREATE VIEW %s.%s(regionid, salescity) " + + "AS SELECT region_id, * FROM cp.`region.json`", + "Error: view's query field list has a '*', which is invalid when view's field list is specified." + ); + } + + private static void createViewErrorTestHelper(final String viewSql, final String errorMsg) + throws Exception { + final String createViewSql = String.format(viewSql, TEMP_SCHEMA, "duplicateColumnsInViewDef"); + + testBuilder() + .sqlQuery(createViewSql) + .unOrdered() + .baselineColumns("ok", "summary") + .baselineValues(false, errorMsg) + .go(); + } }
