[3/4] kudu git commit: Improvements and corrections to Impala CREATE TABLE examples

adar Mon, 15 Aug 2016 17:26:46 -0700

Improvements and corrections to Impala CREATE TABLE examples

Change-Id: I093972a7b806787a8c72634851796eebb5e1ae4c
Reviewed-on: http://gerrit.cloudera.org:8080/3376
Reviewed-by: Dan Burkert <[email protected]>
Tested-by: Kudu Jenkins



Project: http://git-wip-us.apache.org/repos/asf/kudu/repo
Commit: http://git-wip-us.apache.org/repos/asf/kudu/commit/5a707994
Tree: http://git-wip-us.apache.org/repos/asf/kudu/tree/5a707994
Diff: http://git-wip-us.apache.org/repos/asf/kudu/diff/5a707994

Branch: refs/heads/master
Commit: 5a707994290e5d601985bf2f2ea6c822b6c0c330
Parents: 274dfb0
Author: Misty Stanley-Jones <[email protected]>
Authored: Mon Jun 13 09:10:33 2016 -0700
Committer: Misty Stanley-Jones <[email protected]>
Committed: Mon Aug 15 23:42:33 2016 +0000

----------------------------------------------------------------------
 docs/kudu_impala_integration.adoc | 66 ++++++++++++++++++----------------
 1 file changed, 36 insertions(+), 30 deletions(-)
----------------------------------------------------------------------


http://git-wip-us.apache.org/repos/asf/kudu/blob/5a707994/docs/kudu_impala_integration.adoc
----------------------------------------------------------------------
diff --git a/docs/kudu_impala_integration.adoc 
b/docs/kudu_impala_integration.adoc
index fb29bd8..16be5cd 100755
--- a/docs/kudu_impala_integration.adoc
+++ b/docs/kudu_impala_integration.adoc
@@ -372,7 +372,7 @@ CREATE TABLE my_first_table
   id BIGINT,
   name STRING
 )
-DISTRIBUTE BY HASH (id) INTO 16 BUCKETS
+DISTRIBUTE BY HASH INTO 16 BUCKETS
 TBLPROPERTIES(
   'storage_handler' = 'com.cloudera.kudu.hive.KuduStorageHandler',
   'kudu.table_name' = 'my_first_table',
@@ -404,6 +404,7 @@ property. In this example, the primary key columns are `ts` 
and `name`.
 [source,sql]
 ----
 CREATE TABLE new_table
+DISTRIBUTE BY HASH INTO 16 BUCKETS
 TBLPROPERTIES(
   'storage_handler' = 'com.cloudera.kudu.hive.KuduStorageHandler',
   'kudu.table_name' = 'new_table',
@@ -430,8 +431,8 @@ like `SELECT name as new_name`.
 Tables are divided into tablets which are each served by one or more tablet
 servers. Ideally, tablets should split a table's data relatively equally. Kudu 
currently
 has no mechanism for automatically (or manually) splitting a pre-existing 
tablet.
-Until this feature has been implemented, you must pre-split your table when 
you create
-it, When designing your table schema, consider primary keys that will allow 
you to
+Until this feature has been implemented, *you must pre-split your table when 
you create
+it*. When designing your table schema, consider primary keys that will allow 
you to
 pre-split your table into tablets which grow at similar rates. You can provide 
split
 points using a `DISTRIBUTE BY` clause when creating a table using Impala:
 
@@ -455,13 +456,13 @@ CREATE TABLE cust_behavior (
   rating INT,
   fulfilled_date BIGINT
 )
-DISTRIBUTE BY RANGE(_id)
-SPLIT ROWS((1439560049342),
-           (1439566253755),
-           (1439572458168),
-           (1439578662581),
-           (1439584866994),
-           (1439591071407))
+DISTRIBUTE BY RANGE (_id)
+  SPLIT ROWS((1439560049342),
+             (1439566253755),
+             (1439572458168),
+             (1439578662581),
+             (1439584866994),
+             (1439591071407))
 TBLPROPERTIES(
   'storage_handler' = 'com.cloudera.kudu.hive.KuduStorageHandler',
   'kudu.table_name' = 'cust_behavior',
@@ -491,12 +492,13 @@ use the following SQL:
 
 [source,sql]
 ----
-CREATE DATABASE impala_kudu
+CREATE DATABASE impala_kudu;
 USE impala_kudu;
 CREATE TABLE my_first_table (
   id BIGINT,
   name STRING
 )
+DISTRIBUTE BY HASH INTO 4 BUCKETS
 TBLPROPERTIES(
   'storage_handler' = 'com.cloudera.kudu.hive.KuduStorageHandler',
   'kudu.table_name' = 'my_first_table',
@@ -605,13 +607,13 @@ CREATE TABLE customers (
   name STRING,
   purchase_count int32,
 )
-DISTRIBUTE BY RANGE(state)
-SPLIT ROWS(('al'),
-           ('ak'),
-           ('ar'),
-           ...
-           ('wv'),
-           ('wy'))
+DISTRIBUTE BY RANGE (state)
+  SPLIT ROWS (('al'),
+              ('ak'),
+              ('ar'),
+              ...
+              ('wv'),
+              ('wy'))
 TBLPROPERTIES(
   'storage_handler' = 'com.cloudera.kudu.hive.KuduStorageHandler',
   'kudu.table_name' = 'customers',
@@ -666,7 +668,7 @@ CREATE TABLE cust_behavior (
   rating INT,
   fulfilled_date BIGINT
 )
-DISTRIBUTE BY HASH (id) INTO 16 BUCKETS
+DISTRIBUTE BY HASH INTO 16 BUCKETS
 TBLPROPERTIES(
   'storage_handler' = 'com.cloudera.kudu.hive.KuduStorageHandler',
   'kudu.table_name' = 'cust_behavior',
@@ -701,14 +703,14 @@ CREATE TABLE customers (
   name STRING,
   purchase_count int32,
 )
-DISTRIBUTE BY RANGE(state, name)
-  SPLIT ROWS(('al', ''),
-             ('al', 'm'),
-             ('ak', ''),
-             ('ak', 'm'),
-             ...
-             ('wy', ''),
-             ('wy', 'm'))
+DISTRIBUTE BY RANGE (state, name)
+  SPLIT ROWS (('al', ''),
+              ('al', 'm'),
+              ('ak', ''),
+              ('ak', 'm'),
+              ...
+              ('wy', ''),
+              ('wy', 'm'))
 TBLPROPERTIES(
   'storage_handler' = 'com.cloudera.kudu.hive.KuduStorageHandler',
   'kudu.table_name' = 'customers',
@@ -728,6 +730,10 @@ based upon the value of the `sku` string. Writes are 
spread across at least four
 (and possibly up to 16). When you query for a contiguous range of `sku` 
values, you have a
 good chance of only needing to read from a quarter of the tablets to fulfill 
the query.
 
+NOTE: By default, the entire primary key is hashed when you use `DISTRIBUTE BY 
HASH`.
+To hash on only part of the primary key, specify it by using syntax like 
`DISTRIBUTE
+BY HASH (id, sku)`.
+
 [source,sql]
 ----
 CREATE TABLE cust_behavior (
@@ -747,9 +753,9 @@ CREATE TABLE cust_behavior (
 )
 DISTRIBUTE BY HASH (id) INTO 4 BUCKETS,
 RANGE (sku)
-  SPLIT ROWS(('g'),
-             ('o'),
-             ('u'))
+  SPLIT ROWS (('g'),
+              ('o'),
+              ('u'))
 TBLPROPERTIES(
   'storage_handler' = 'com.cloudera.kudu.hive.KuduStorageHandler',
   'kudu.table_name' = 'cust_behavior',

[3/4] kudu git commit: Improvements and corrections to Impala CREATE TABLE examples

Reply via email to