IMPALA-5546: Allow creating unpartitioned Kudu tables This patch makes it possible to create unpartitioned, managed Kudu tables from Impala, by making the 'PARTITION BY' clause of 'CREATE TABLE... STORED AS KUDU' optional:
CREATE TABLE [IF NOT EXISTS] [db_name.]table_name (col_name data_type [kudu_column_attribute ...] [COMMENT 'col_comment'] [, ...] [PRIMARY KEY (col_name[, ...])] ) [PARTITION BY kudu_partition_clause] [COMMENT 'table_comment'] STORED AS KUDU [TBLPROPERTIES ('key1'='value1', 'key2'='value2', ...)] Kudu represents this as a table that is range partitioned on no columns. Because unpartitioned Kudu tables are inefficient for large data sizes, and because the syntax doesn't make it explicit that the table will be unpartitioned, there is a warning issued to encourage users to created partitioned tables. This patch also converts the tpch_kudu.nation and tpch_kudu.region tables to be unpartitioned, as they are very small. Testing: - Updated analysis tests. - Added e2e test that creates unpartitioned table and inserts into it. Change-Id: I281f173dbec1484eb13434d53ea581a0f245358a Reviewed-on: http://gerrit.cloudera.org:8080/7446 Reviewed-by: Thomas Tauber-Marshall <tmarsh...@cloudera.com> Tested-by: Impala Public Jenkins Project: http://git-wip-us.apache.org/repos/asf/incubator-impala/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-impala/commit/b881fba7 Tree: http://git-wip-us.apache.org/repos/asf/incubator-impala/tree/b881fba7 Diff: http://git-wip-us.apache.org/repos/asf/incubator-impala/diff/b881fba7 Branch: refs/heads/master Commit: b881fba7633f138c251532f3c58255689db4e22b Parents: 3deb1a9 Author: Thomas Tauber-Marshall <tmarsh...@cloudera.com> Authored: Mon Jul 17 12:25:45 2017 -0700 Committer: Impala Public Jenkins <impala-public-jenk...@gerrit.cloudera.org> Committed: Mon Aug 7 19:53:59 2017 +0000 ---------------------------------------------------------------------- .../apache/impala/analysis/CreateTableStmt.java | 4 +- .../impala/service/KuduCatalogOpExecutor.java | 4 ++ .../apache/impala/analysis/AnalyzeDDLTest.java | 15 +++++--- testdata/datasets/tpch/tpch_kudu_template.sql | 2 - testdata/datasets/tpch/tpch_schema_template.sql | 4 +- .../queries/QueryTest/kudu_create.test | 40 ++++++++++++++++++++ 6 files changed, 58 insertions(+), 11 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/b881fba7/fe/src/main/java/org/apache/impala/analysis/CreateTableStmt.java ---------------------------------------------------------------------- diff --git a/fe/src/main/java/org/apache/impala/analysis/CreateTableStmt.java b/fe/src/main/java/org/apache/impala/analysis/CreateTableStmt.java index 17ac46d..6169997 100644 --- a/fe/src/main/java/org/apache/impala/analysis/CreateTableStmt.java +++ b/fe/src/main/java/org/apache/impala/analysis/CreateTableStmt.java @@ -320,8 +320,8 @@ public class CreateTableStmt extends StatementBase { if (!getKuduPartitionParams().isEmpty()) { analyzeKuduPartitionParams(analyzer); } else { - throw new AnalysisException("Table partitioning must be specified for " + - "managed Kudu tables."); + analyzer.addWarning( + "Unpartitioned Kudu tables are inefficient for large data sizes."); } } http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/b881fba7/fe/src/main/java/org/apache/impala/service/KuduCatalogOpExecutor.java ---------------------------------------------------------------------- diff --git a/fe/src/main/java/org/apache/impala/service/KuduCatalogOpExecutor.java b/fe/src/main/java/org/apache/impala/service/KuduCatalogOpExecutor.java index cbbfccf..c81aca4 100644 --- a/fe/src/main/java/org/apache/impala/service/KuduCatalogOpExecutor.java +++ b/fe/src/main/java/org/apache/impala/service/KuduCatalogOpExecutor.java @@ -181,6 +181,10 @@ public class KuduCatalogOpExecutor { if (!hasRangePartitioning) { tableOpts.setRangePartitionColumns(Collections.<String>emptyList()); } + } else { + // This table is unpartitioned, which Kudu represents as a table range partitioned + // on no columns. + tableOpts.setRangePartitionColumns(Collections.<String>emptyList()); } // Set the number of table replicas, if specified. http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/b881fba7/fe/src/test/java/org/apache/impala/analysis/AnalyzeDDLTest.java ---------------------------------------------------------------------- diff --git a/fe/src/test/java/org/apache/impala/analysis/AnalyzeDDLTest.java b/fe/src/test/java/org/apache/impala/analysis/AnalyzeDDLTest.java index 6928ed2..2a3e383 100644 --- a/fe/src/test/java/org/apache/impala/analysis/AnalyzeDDLTest.java +++ b/fe/src/test/java/org/apache/impala/analysis/AnalyzeDDLTest.java @@ -1587,6 +1587,10 @@ public class AnalyzeDDLTest extends FrontendTestBase { "partition value = 30) stored as kudu as select id, bool_col, tinyint_col, " + "smallint_col, int_col, bigint_col, float_col, double_col, date_string_col, " + "string_col from functional.alltypestiny"); + // Creating unpartitioned table results in a warning. + AnalyzesOk("create table t primary key(id) stored as kudu as select id, bool_col " + + "from functional.alltypestiny", + "Unpartitioned Kudu tables are inefficient for large data sizes."); // CTAS in an external Kudu table AnalysisError("create external table t stored as kudu " + "tblproperties('kudu.table_name'='t') as select id, int_col from " + @@ -2197,9 +2201,10 @@ public class AnalyzeDDLTest extends FrontendTestBase { AnalysisError("create table tab (x int) tblproperties (" + "'storage_handler'='com.cloudera.kudu.hive.KuduStorageHandler')", CreateTableStmt.KUDU_STORAGE_HANDLER_ERROR_MESSAGE); - AnalysisError("create table tab (x int primary key) stored as kudu tblproperties (" + + // Creating unpartitioned table results in a warning. + AnalyzesOk("create table tab (x int primary key) stored as kudu tblproperties (" + "'storage_handler'='com.cloudera.kudu.hive.KuduStorageHandler')", - "Table partitioning must be specified for managed Kudu tables."); + "Unpartitioned Kudu tables are inefficient for large data sizes."); // Invalid value for number of replicas AnalysisError("create table t (x int primary key) stored as kudu tblproperties (" + "'kudu.num_tablet_replicas'='1.1')", @@ -2211,9 +2216,9 @@ public class AnalyzeDDLTest extends FrontendTestBase { AnalysisError("create table tab (a int primary key) partition by hash (a) " + "partitions 3 stored as kudu location '/test-warehouse/'", "LOCATION cannot be specified for a Kudu table."); - // PARTITION BY is required for managed tables. - AnalysisError("create table tab (a int, primary key (a)) stored as kudu", - "Table partitioning must be specified for managed Kudu tables."); + // Creating unpartitioned table results in a warning. + AnalyzesOk("create table tab (a int, primary key (a)) stored as kudu", + "Unpartitioned Kudu tables are inefficient for large data sizes."); AnalysisError("create table tab (a int) stored as kudu", "A primary key is required for a Kudu table."); // Using ROW FORMAT with a Kudu table http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/b881fba7/testdata/datasets/tpch/tpch_kudu_template.sql ---------------------------------------------------------------------- diff --git a/testdata/datasets/tpch/tpch_kudu_template.sql b/testdata/datasets/tpch/tpch_kudu_template.sql index 62fa072..032a19a 100644 --- a/testdata/datasets/tpch/tpch_kudu_template.sql +++ b/testdata/datasets/tpch/tpch_kudu_template.sql @@ -122,7 +122,6 @@ CREATE TABLE IF NOT EXISTS {target_db_name}.nation ( N_REGIONKEY BIGINT, N_COMMENT STRING ) -partition by hash (n_nationkey) partitions {buckets} STORED AS KUDU tblproperties ('kudu.master_addresses' = '{kudu_master}:7051'); @@ -134,7 +133,6 @@ CREATE TABLE IF NOT EXISTS {target_db_name}.region ( R_NAME STRING, R_COMMENT STRING ) -partition by hash (r_regionkey) partitions {buckets} STORED AS KUDU tblproperties ('kudu.master_addresses' = '{kudu_master}:7051'); http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/b881fba7/testdata/datasets/tpch/tpch_schema_template.sql ---------------------------------------------------------------------- diff --git a/testdata/datasets/tpch/tpch_schema_template.sql b/testdata/datasets/tpch/tpch_schema_template.sql index 2f99fd0..dfba06d 100644 --- a/testdata/datasets/tpch/tpch_schema_template.sql +++ b/testdata/datasets/tpch/tpch_schema_template.sql @@ -179,7 +179,7 @@ create table if not exists {db_name}{db_suffix}.{table_name} ( N_REGIONKEY SMALLINT, N_COMMENT STRING ) -partition by hash (n_nationkey) partitions 9 stored as kudu; +stored as kudu; ---- DEPENDENT_LOAD INSERT OVERWRITE TABLE {db_name}{db_suffix}.{table_name} SELECT * FROM {db_name}.{table_name}; ---- LOAD @@ -202,7 +202,7 @@ create table if not exists {db_name}{db_suffix}.{table_name} ( R_NAME STRING, R_COMMENT STRING ) -partition by hash (r_regionkey) partitions 9 stored as kudu; +stored as kudu; ---- DEPENDENT_LOAD INSERT OVERWRITE TABLE {db_name}{db_suffix}.{table_name} SELECT * FROM {db_name}.{table_name}; ---- LOAD http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/b881fba7/testdata/workloads/functional-query/queries/QueryTest/kudu_create.test ---------------------------------------------------------------------- diff --git a/testdata/workloads/functional-query/queries/QueryTest/kudu_create.test b/testdata/workloads/functional-query/queries/QueryTest/kudu_create.test index 4aaed16..f6e16e1 100644 --- a/testdata/workloads/functional-query/queries/QueryTest/kudu_create.test +++ b/testdata/workloads/functional-query/queries/QueryTest/kudu_create.test @@ -248,3 +248,43 @@ I, TS1, TS2 ---- TYPES INT,TIMESTAMP,TIMESTAMP ==== +---- QUERY +# create an unpartitioned table +create table unpartitioned_kudu_table (col0 bigint primary key, col1 string) +stored as kudu +---- RESULTS +---- ERRORS +Unpartitioned Kudu tables are inefficient for large data sizes. +==== +---- QUERY +insert into unpartitioned_kudu_table values (0, 'zero'), (1, 'one') +---- RUNTIME_PROFILE +NumModifiedRows: 2 +NumRowErrors: 0 +---- LABELS +COL0,COL1 +---- DML_RESULTS: unpartitioned_kudu_table +0,'zero' +1,'one' +---- TYPES +BIGINT,STRING +==== +---- QUERY +create table unpartitioned_kudu_table2 primary key(id) stored as kudu +as select id from functional.alltypestiny where id > 4 +---- RESULTS +'Inserted 3 row(s)' +---- ERRORS +Unpartitioned Kudu tables are inefficient for large data sizes. +==== +---- QUERY +select * from unpartitioned_kudu_table2 +---- RESULTS +5 +6 +7 +---- LABELS +ID +---- TYPES +INT +==== \ No newline at end of file