IMPALA-5546: Allow creating unpartitioned Kudu tables

This patch makes it possible to create unpartitioned, managed Kudu
tables from Impala, by making the 'PARTITION BY' clause of 'CREATE
TABLE... STORED AS KUDU' optional:

CREATE TABLE [IF NOT EXISTS] [db_name.]table_name
  (col_name data_type
    [kudu_column_attribute ...]
    [COMMENT 'col_comment']
    [, ...]
    [PRIMARY KEY (col_name[, ...])]
  )
  [PARTITION BY kudu_partition_clause]
  [COMMENT 'table_comment']
  STORED AS KUDU
  [TBLPROPERTIES ('key1'='value1', 'key2'='value2', ...)]

Kudu represents this as a table that is range partitioned on no
columns.

Because unpartitioned Kudu tables are inefficient for large data
sizes, and because the syntax doesn't make it explicit that the table
will be unpartitioned, there is a warning issued to encourage users
to created partitioned tables.

This patch also converts the tpch_kudu.nation and tpch_kudu.region
tables to be unpartitioned, as they are very small.

Testing:
- Updated analysis tests.
- Added e2e test that creates unpartitioned table and inserts into it.

Change-Id: I281f173dbec1484eb13434d53ea581a0f245358a
Reviewed-on: http://gerrit.cloudera.org:8080/7446
Reviewed-by: Thomas Tauber-Marshall <tmarsh...@cloudera.com>
Tested-by: Impala Public Jenkins


Project: http://git-wip-us.apache.org/repos/asf/incubator-impala/repo
Commit: http://git-wip-us.apache.org/repos/asf/incubator-impala/commit/b881fba7
Tree: http://git-wip-us.apache.org/repos/asf/incubator-impala/tree/b881fba7
Diff: http://git-wip-us.apache.org/repos/asf/incubator-impala/diff/b881fba7

Branch: refs/heads/master
Commit: b881fba7633f138c251532f3c58255689db4e22b
Parents: 3deb1a9
Author: Thomas Tauber-Marshall <tmarsh...@cloudera.com>
Authored: Mon Jul 17 12:25:45 2017 -0700
Committer: Impala Public Jenkins <impala-public-jenk...@gerrit.cloudera.org>
Committed: Mon Aug 7 19:53:59 2017 +0000

----------------------------------------------------------------------
 .../apache/impala/analysis/CreateTableStmt.java |  4 +-
 .../impala/service/KuduCatalogOpExecutor.java   |  4 ++
 .../apache/impala/analysis/AnalyzeDDLTest.java  | 15 +++++---
 testdata/datasets/tpch/tpch_kudu_template.sql   |  2 -
 testdata/datasets/tpch/tpch_schema_template.sql |  4 +-
 .../queries/QueryTest/kudu_create.test          | 40 ++++++++++++++++++++
 6 files changed, 58 insertions(+), 11 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/b881fba7/fe/src/main/java/org/apache/impala/analysis/CreateTableStmt.java
----------------------------------------------------------------------
diff --git a/fe/src/main/java/org/apache/impala/analysis/CreateTableStmt.java 
b/fe/src/main/java/org/apache/impala/analysis/CreateTableStmt.java
index 17ac46d..6169997 100644
--- a/fe/src/main/java/org/apache/impala/analysis/CreateTableStmt.java
+++ b/fe/src/main/java/org/apache/impala/analysis/CreateTableStmt.java
@@ -320,8 +320,8 @@ public class CreateTableStmt extends StatementBase {
     if (!getKuduPartitionParams().isEmpty()) {
       analyzeKuduPartitionParams(analyzer);
     } else {
-      throw new AnalysisException("Table partitioning must be specified for " +
-          "managed Kudu tables.");
+      analyzer.addWarning(
+          "Unpartitioned Kudu tables are inefficient for large data sizes.");
     }
   }
 

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/b881fba7/fe/src/main/java/org/apache/impala/service/KuduCatalogOpExecutor.java
----------------------------------------------------------------------
diff --git 
a/fe/src/main/java/org/apache/impala/service/KuduCatalogOpExecutor.java 
b/fe/src/main/java/org/apache/impala/service/KuduCatalogOpExecutor.java
index cbbfccf..c81aca4 100644
--- a/fe/src/main/java/org/apache/impala/service/KuduCatalogOpExecutor.java
+++ b/fe/src/main/java/org/apache/impala/service/KuduCatalogOpExecutor.java
@@ -181,6 +181,10 @@ public class KuduCatalogOpExecutor {
       if (!hasRangePartitioning) {
         tableOpts.setRangePartitionColumns(Collections.<String>emptyList());
       }
+    } else {
+      // This table is unpartitioned, which Kudu represents as a table range 
partitioned
+      // on no columns.
+      tableOpts.setRangePartitionColumns(Collections.<String>emptyList());
     }
 
     // Set the number of table replicas, if specified.

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/b881fba7/fe/src/test/java/org/apache/impala/analysis/AnalyzeDDLTest.java
----------------------------------------------------------------------
diff --git a/fe/src/test/java/org/apache/impala/analysis/AnalyzeDDLTest.java 
b/fe/src/test/java/org/apache/impala/analysis/AnalyzeDDLTest.java
index 6928ed2..2a3e383 100644
--- a/fe/src/test/java/org/apache/impala/analysis/AnalyzeDDLTest.java
+++ b/fe/src/test/java/org/apache/impala/analysis/AnalyzeDDLTest.java
@@ -1587,6 +1587,10 @@ public class AnalyzeDDLTest extends FrontendTestBase {
         "partition value = 30) stored as kudu as select id, bool_col, 
tinyint_col, " +
         "smallint_col, int_col, bigint_col, float_col, double_col, 
date_string_col, " +
         "string_col from functional.alltypestiny");
+    // Creating unpartitioned table results in a warning.
+    AnalyzesOk("create table t primary key(id) stored as kudu as select id, 
bool_col " +
+        "from functional.alltypestiny",
+        "Unpartitioned Kudu tables are inefficient for large data sizes.");
     // CTAS in an external Kudu table
     AnalysisError("create external table t stored as kudu " +
         "tblproperties('kudu.table_name'='t') as select id, int_col from " +
@@ -2197,9 +2201,10 @@ public class AnalyzeDDLTest extends FrontendTestBase {
     AnalysisError("create table tab (x int) tblproperties (" +
         "'storage_handler'='com.cloudera.kudu.hive.KuduStorageHandler')",
         CreateTableStmt.KUDU_STORAGE_HANDLER_ERROR_MESSAGE);
-    AnalysisError("create table tab (x int primary key) stored as kudu 
tblproperties (" +
+    // Creating unpartitioned table results in a warning.
+    AnalyzesOk("create table tab (x int primary key) stored as kudu 
tblproperties (" +
         "'storage_handler'='com.cloudera.kudu.hive.KuduStorageHandler')",
-        "Table partitioning must be specified for managed Kudu tables.");
+        "Unpartitioned Kudu tables are inefficient for large data sizes.");
     // Invalid value for number of replicas
     AnalysisError("create table t (x int primary key) stored as kudu 
tblproperties (" +
         "'kudu.num_tablet_replicas'='1.1')",
@@ -2211,9 +2216,9 @@ public class AnalyzeDDLTest extends FrontendTestBase {
     AnalysisError("create table tab (a int primary key) partition by hash (a) 
" +
         "partitions 3 stored as kudu location '/test-warehouse/'",
         "LOCATION cannot be specified for a Kudu table.");
-    // PARTITION BY is required for managed tables.
-    AnalysisError("create table tab (a int, primary key (a)) stored as kudu",
-        "Table partitioning must be specified for managed Kudu tables.");
+    // Creating unpartitioned table results in a warning.
+    AnalyzesOk("create table tab (a int, primary key (a)) stored as kudu",
+        "Unpartitioned Kudu tables are inefficient for large data sizes.");
     AnalysisError("create table tab (a int) stored as kudu",
         "A primary key is required for a Kudu table.");
     // Using ROW FORMAT with a Kudu table

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/b881fba7/testdata/datasets/tpch/tpch_kudu_template.sql
----------------------------------------------------------------------
diff --git a/testdata/datasets/tpch/tpch_kudu_template.sql 
b/testdata/datasets/tpch/tpch_kudu_template.sql
index 62fa072..032a19a 100644
--- a/testdata/datasets/tpch/tpch_kudu_template.sql
+++ b/testdata/datasets/tpch/tpch_kudu_template.sql
@@ -122,7 +122,6 @@ CREATE TABLE IF NOT EXISTS {target_db_name}.nation (
   N_REGIONKEY BIGINT,
   N_COMMENT STRING
 )
-partition by hash (n_nationkey) partitions {buckets}
 STORED AS KUDU
 tblproperties ('kudu.master_addresses' = '{kudu_master}:7051');
 
@@ -134,7 +133,6 @@ CREATE TABLE IF NOT EXISTS {target_db_name}.region (
   R_NAME STRING,
   R_COMMENT STRING
 )
-partition by hash (r_regionkey) partitions {buckets}
 STORED AS KUDU
 tblproperties ('kudu.master_addresses' = '{kudu_master}:7051');
 

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/b881fba7/testdata/datasets/tpch/tpch_schema_template.sql
----------------------------------------------------------------------
diff --git a/testdata/datasets/tpch/tpch_schema_template.sql 
b/testdata/datasets/tpch/tpch_schema_template.sql
index 2f99fd0..dfba06d 100644
--- a/testdata/datasets/tpch/tpch_schema_template.sql
+++ b/testdata/datasets/tpch/tpch_schema_template.sql
@@ -179,7 +179,7 @@ create table if not exists 
{db_name}{db_suffix}.{table_name} (
   N_REGIONKEY SMALLINT,
   N_COMMENT STRING
 )
-partition by hash (n_nationkey) partitions 9 stored as kudu;
+stored as kudu;
 ---- DEPENDENT_LOAD
 INSERT OVERWRITE TABLE {db_name}{db_suffix}.{table_name} SELECT * FROM 
{db_name}.{table_name};
 ---- LOAD
@@ -202,7 +202,7 @@ create table if not exists 
{db_name}{db_suffix}.{table_name} (
   R_NAME STRING,
   R_COMMENT STRING
 )
-partition by hash (r_regionkey) partitions 9 stored as kudu;
+stored as kudu;
 ---- DEPENDENT_LOAD
 INSERT OVERWRITE TABLE {db_name}{db_suffix}.{table_name} SELECT * FROM 
{db_name}.{table_name};
 ---- LOAD

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/b881fba7/testdata/workloads/functional-query/queries/QueryTest/kudu_create.test
----------------------------------------------------------------------
diff --git 
a/testdata/workloads/functional-query/queries/QueryTest/kudu_create.test 
b/testdata/workloads/functional-query/queries/QueryTest/kudu_create.test
index 4aaed16..f6e16e1 100644
--- a/testdata/workloads/functional-query/queries/QueryTest/kudu_create.test
+++ b/testdata/workloads/functional-query/queries/QueryTest/kudu_create.test
@@ -248,3 +248,43 @@ I, TS1, TS2
 ---- TYPES
 INT,TIMESTAMP,TIMESTAMP
 ====
+---- QUERY
+# create an unpartitioned table
+create table unpartitioned_kudu_table (col0 bigint primary key, col1 string)
+stored as kudu
+---- RESULTS
+---- ERRORS
+Unpartitioned Kudu tables are inefficient for large data sizes.
+====
+---- QUERY
+insert into unpartitioned_kudu_table values (0, 'zero'), (1, 'one')
+---- RUNTIME_PROFILE
+NumModifiedRows: 2
+NumRowErrors: 0
+---- LABELS
+COL0,COL1
+---- DML_RESULTS: unpartitioned_kudu_table
+0,'zero'
+1,'one'
+---- TYPES
+BIGINT,STRING
+====
+---- QUERY
+create table unpartitioned_kudu_table2 primary key(id) stored as kudu
+as select id from functional.alltypestiny where id > 4
+---- RESULTS
+'Inserted 3 row(s)'
+---- ERRORS
+Unpartitioned Kudu tables are inefficient for large data sizes.
+====
+---- QUERY
+select * from unpartitioned_kudu_table2
+---- RESULTS
+5
+6
+7
+---- LABELS
+ID
+---- TYPES
+INT
+====
\ No newline at end of file

Reply via email to