5.10 versions.

jrussell Tue, 01 Nov 2016 16:13:58 -0700

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/3c2c8f12/docs/topics/impala_alter_table.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_alter_table.xml 
b/docs/topics/impala_alter_table.xml
index 800261a..5e6e7a0 100644
--- a/docs/topics/impala_alter_table.xml
+++ b/docs/topics/impala_alter_table.xml
@@ -3,7 +3,7 @@
 <concept id="alter_table">
 
   <title>ALTER TABLE Statement</title>
-  <titlealts><navtitle>ALTER TABLE</navtitle></titlealts>
+  <titlealts audience="PDF"><navtitle>ALTER TABLE</navtitle></titlealts>
   <prolog>
     <metadata>
       <data name="Category" value="Impala"/>
@@ -13,6 +13,9 @@
       <data name="Category" value="HDFS Caching"/>
       <data name="Category" value="Tables"/>
       <data name="Category" value="Schemas"/>
+      <data name="Category" value="S3"/>
+      <data name="Category" value="Developers"/>
+      <data name="Category" value="Data Analysts"/>
     </metadata>
   </prolog>
 
@@ -20,12 +23,14 @@
 
     <p>
       <indexterm audience="Cloudera">ALTER TABLE statement</indexterm>
-      The <codeph>ALTER TABLE</codeph> statement changes the structure or 
properties of an existing table. In
-      Impala, this is a logical operation that updates the table metadata in 
the metastore database that Impala
-      shares with Hive; <codeph>ALTER TABLE</codeph> does not actually 
rewrite, move, and so on the actual data
-      files. Thus, you might need to perform corresponding physical filesystem 
operations, such as moving data
-      files to a different HDFS directory, rewriting the data files to include 
extra fields, or converting them to
-      a different file format.
+      The <codeph>ALTER TABLE</codeph> statement changes the structure or 
properties of an existing Impala table.
+    </p>
+    <p>
+      In Impala, this is primarily a logical operation that updates the table 
metadata in the metastore database that Impala
+      shares with Hive. Most <codeph>ALTER TABLE</codeph> operations do not 
actually rewrite, move, and so on the actual data
+      files. (The <codeph>RENAME TO</codeph> clause is the one exception; it 
can cause HDFS files to be moved to different paths.)
+      When you do an <codeph>ALTER TABLE</codeph> operation, you typically 
need to perform corresponding physical filesystem operations,
+      such as rewriting the data files to include extra fields, or converting 
them to a different file format.
     </p>
 
     <p conref="../shared/impala_common.xml#common/syntax_blurb"/>
@@ -37,7 +42,8 @@ ALTER TABLE <varname>name</varname> DROP [COLUMN] 
<varname>column_name</varname>
 ALTER TABLE <varname>name</varname> CHANGE <varname>column_name</varname> 
<varname>new_name</varname> <varname>new_type</varname>
 ALTER TABLE <varname>name</varname> REPLACE COLUMNS 
(<varname>col_spec</varname>[, <varname>col_spec</varname> ...])
 
-ALTER TABLE <varname>name</varname> { ADD | DROP } PARTITION 
(<varname>partition_spec</varname>) <ph rev="2.3.0">[PURGE]</ph>
+ALTER TABLE <varname>name</varname> { ADD [IF NOT EXISTS] | DROP [IF EXISTS] } 
PARTITION (<varname>partition_spec</varname>) <ph rev="2.3.0">[PURGE]</ph>
+<ph rev="2.3.0 IMPALA-1568 CDH-36799">ALTER TABLE <varname>name</varname> 
RECOVER PARTITIONS</ph>
 
 ALTER TABLE <varname>name</varname> [PARTITION 
(<varname>partition_spec</varname>)]
   SET { FILEFORMAT <varname>file_format</varname>
@@ -45,6 +51,11 @@ ALTER TABLE <varname>name</varname> [PARTITION 
(<varname>partition_spec</varname
   | TBLPROPERTIES (<varname>table_properties</varname>)
   | SERDEPROPERTIES (<varname>serde_properties</varname>) }
 
+<ph rev="2.6.0 IMPALA-3369">ALTER TABLE <varname>name</varname> 
<varname>colname</varname>
+  ('<varname>statsKey</varname>'='<varname>val</varname>, ...)
+
+statsKey ::= numDVs | numNulls | avgSize | maxSize</ph>
+
 <ph rev="1.4.0">ALTER TABLE <varname>name</varname> [PARTITION 
(<varname>partition_spec</varname>)] SET { CACHED IN 
'<varname>pool_name</varname>' <ph rev="2.2.0">[WITH REPLICATION = 
<varname>integer</varname>]</ph> | UNCACHED }</ph>
 
 <varname>new_name</varname> ::= 
[<varname>new_database</varname>.]<varname>new_table_name</varname>
@@ -65,7 +76,7 @@ ALTER TABLE <varname>name</varname> [PARTITION 
(<varname>partition_spec</varname
     <p conref="../shared/impala_common.xml#common/complex_types_blurb"/>
 
     <p rev="2.3.0">
-      In CDH 5.5 / Impala 2.3 and higher, the <codeph>ALTER TABLE</codeph> 
statement can 
+      In CDH 5.5 / Impala 2.3 and higher, the <codeph>ALTER TABLE</codeph> 
statement can
       change the metadata for tables containing complex types 
(<codeph>ARRAY</codeph>,
       <codeph>STRUCT</codeph>, and <codeph>MAP</codeph>).
       For example, you can use an <codeph>ADD COLUMNS</codeph>, <codeph>DROP 
COLUMN</codeph>, or <codeph>CHANGE</codeph>
@@ -74,7 +85,7 @@ ALTER TABLE <varname>name</varname> [PARTITION 
(<varname>partition_spec</varname
       <codeph>ALTER TABLE</codeph> statement applies to all file formats.
       For example, you can use Impala to update metadata for a staging table 
in a non-Parquet file format where the
       data is populated by Hive. Or you can use <codeph>ALTER TABLE SET 
FILEFORMAT</codeph> to change the format
-      of an existing table to Parquet so that Impala can query it. (Remember 
that changing the file format for a table does
+      of an existing table to Parquet so that Impala can query it. Remember 
that changing the file format for a table does
       not convert the data files within the table; you must prepare any 
Parquet data files containing complex types
       outside Impala, and bring them into the table using <codeph>LOAD 
DATA</codeph> or updating the table's
       <codeph>LOCATION</codeph> property.
@@ -96,12 +107,16 @@ ALTER TABLE <varname>name</varname> [PARTITION 
(<varname>partition_spec</varname
     </p>
 
     <p conref="../shared/impala_common.xml#common/s3_blurb"/>
-    <p rev="2.2.0">
-      You can specify an <codeph>s3a://</codeph> prefix in the 
<codeph>LOCATION</codeph> attribute of a table or partition
-      to make Impala query data from the Amazon S3 filesystem.
-      See <xref href="impala_s3.xml#s3"/> for details.
+
+    <p rev="2.6.0 CDH-39913 IMPALA-1878">
+      You can specify an <codeph>s3a://</codeph> prefix on the 
<codeph>LOCATION</codeph> attribute of a table or partition
+      to make Impala query data from the Amazon S3 filesystem. In CDH 5.8 / 
Impala 2.6 and higher, Impala automatically
+      handles creating or removing the associated folders when you issue 
<codeph>ALTER TABLE</codeph> statements
+      with the <codeph>ADD PARTITION</codeph> or <codeph>DROP 
PARTITION</codeph> clauses.
     </p>
 
+    <p conref="../shared/impala_common.xml#common/s3_ddl"/>
+
     <p rev="1.4.0">
       <b>HDFS caching (CACHED IN clause):</b>
     </p>
@@ -135,7 +150,7 @@ ALTER TABLE <varname>name</varname> [PARTITION 
(<varname>partition_spec</varname
     </p>
 
     <p>
-      For internal tables, his operation physically renames the directory 
within HDFS that contains the data files;
+      For internal tables, this operation physically renames the directory 
within HDFS that contains the data files;
       the original directory name no longer exists. By qualifying the table 
names with database names, you can use
       this technique to move an internal table (and its associated data 
directory) from one database to another.
       For example:
@@ -189,6 +204,121 @@ alter table p1 partition (month=1, day=1) set location 
'/usr/external_data/new_y
 
     <note 
conref="../shared/impala_common.xml#common/add_partition_set_location"/>
 
+    <p rev="2.3.0 IMPALA-1568 CDH-36799">
+      <b>To automatically detect new partition directories added through Hive 
or HDFS operations:</b>
+    </p>
+
+    <p rev="2.3.0 IMPALA-1568 CDH-36799">
+      In CDH 5.5 / Impala 2.3 and higher, the <codeph>RECOVER 
PARTITIONS</codeph> clause scans
+      a partitioned table to detect if any new partition directories were 
added outside of Impala,
+      such as by Hive <codeph>ALTER TABLE</codeph> statements or by 
<cmdname>hdfs dfs</cmdname>
+      or <cmdname>hadoop fs</cmdname> commands. The <codeph>RECOVER 
PARTITIONS</codeph> clause
+      automatically recognizes any data files present in these new 
directories, the same as
+      the <codeph>REFRESH</codeph> statement does.
+    </p>
+
+    <p rev="2.3.0 IMPALA-1568 CDH-36799">
+      For example, here is a sequence of examples showing how you might create 
a partitioned table in Impala,
+      create new partitions through Hive, copy data files into the new 
partitions with the <cmdname>hdfs</cmdname>
+      command, and have Impala recognize the new partitions and new data:
+    </p>
+
+    <p rev="2.3.0 IMPALA-1568 CDH-36799">
+      In Impala, create the table, and a single partition for demonstration 
purposes:
+    </p>
+
+<codeblock rev="2.3.0 IMPALA-1568 CDH-36799">
+<![CDATA[
+create database recover_partitions;
+use recover_partitions;
+create table t1 (s string) partitioned by (yy int, mm int);
+insert into t1 partition (yy = 2016, mm = 1) values ('Partition exists');
+show files in t1;
++---------------------------------------------------------------------+------+--------------+
+| Path                                                                | Size | 
Partition    |
++---------------------------------------------------------------------+------+--------------+
+| /user/hive/warehouse/recover_partitions.db/t1/yy=2016/mm=1/data.txt | 17B  | 
yy=2016/mm=1 |
++---------------------------------------------------------------------+------+--------------+
+quit;
+]]>
+</codeblock>
+
+    <p rev="2.3.0 IMPALA-1568 CDH-36799">
+      In Hive, create some new partitions. In a real use case, you might 
create the
+      partitions and populate them with data as the final stages of an ETL 
pipeline.
+    </p>
+
+<codeblock rev="2.3.0 IMPALA-1568 CDH-36799">
+<![CDATA[
+hive> use recover_partitions;
+OK
+hive> alter table t1 add partition (yy = 2016, mm = 2);
+OK
+hive> alter table t1 add partition (yy = 2016, mm = 3);
+OK
+hive> quit;
+]]>
+</codeblock>
+
+    <p rev="2.3.0 IMPALA-1568 CDH-36799">
+      For demonstration purposes, manually copy data (a single row) into these
+      new partitions, using manual HDFS operations:
+    </p>
+
+<codeblock rev="2.3.0 IMPALA-1568 CDH-36799">
+<![CDATA[
+$ hdfs dfs -ls /user/hive/warehouse/recover_partitions.db/t1/yy=2016/
+Found 3 items
+drwxr-xr-x - impala   hive 0 2016-05-09 16:06 
/user/hive/warehouse/recover_partitions.db/t1/yy=2016/mm=1
+drwxr-xr-x - jrussell hive 0 2016-05-09 16:14 
/user/hive/warehouse/recover_partitions.db/t1/yy=2016/mm=2
+drwxr-xr-x - jrussell hive 0 2016-05-09 16:13 
/user/hive/warehouse/recover_partitions.db/t1/yy=2016/mm=3
+
+$ hdfs dfs -cp 
/user/hive/warehouse/recover_partitions.db/t1/yy=2016/mm=1/data.txt \
+  /user/hive/warehouse/recover_partitions.db/t1/yy=2016/mm=2/data.txt
+$ hdfs dfs -cp 
/user/hive/warehouse/recover_partitions.db/t1/yy=2016/mm=1/data.txt \
+  /user/hive/warehouse/recover_partitions.db/t1/yy=2016/mm=3/data.txt
+]]>
+</codeblock>
+
+<codeblock rev="2.3.0 IMPALA-1568 CDH-36799">
+<![CDATA[
+hive> select * from t1;
+OK
+Partition exists  2016  1
+Partition exists  2016  2
+Partition exists  2016  3
+hive> quit;
+]]>
+</codeblock>
+
+    <p rev="2.3.0 IMPALA-1568 CDH-36799">
+      In Impala, initially the partitions and data are not visible.
+      Running <codeph>ALTER TABLE</codeph> with the <codeph>RECOVER 
PARTITIONS</codeph>
+      clause scans the table data directory to find any new partition 
directories, and
+      the data files inside them:
+    </p>
+
+<codeblock rev="2.3.0 IMPALA-1568 CDH-36799">
+<![CDATA[
+select * from t1;
++------------------+------+----+
+| s                | yy   | mm |
++------------------+------+----+
+| Partition exists | 2016 | 1  |
++------------------+------+----+
+
+alter table t1 recover partitions;
+select * from t1;
++------------------+------+----+
+| s                | yy   | mm |
++------------------+------+----+
+| Partition exists | 2016 | 1  |
+| Partition exists | 2016 | 3  |
+| Partition exists | 2016 | 2  |
++------------------+------+----+
+]]>
+</codeblock>
+
     <p rev="1.2">
       <b>To change the key-value pairs of the TBLPROPERTIES and 
SERDEPROPERTIES fields:</b>
     </p>
@@ -220,9 +350,39 @@ yes,no</codeblock>
     <p>
       Use the <codeph>DESCRIBE FORMATTED</codeph> statement to see the current 
values of these properties for an
       existing table. See <xref href="impala_create_table.xml#create_table"/> 
for more details about these clauses.
-      See <xref href="impala_perf_stats.xml#perf_stats_manual"/> for an 
example of using table properties to
+      See <xref href="impala_perf_stats.xml#perf_table_stats_manual"/> for an 
example of using table properties to
       fine-tune the performance-related table statistics.
     </p>
+      
+    <p>
+      <b>To manually set or update table or column statistics:</b>
+    </p>
+
+    <p>
+      Although for most tables the <codeph>COMPUTE STATS</codeph> or 
<codeph>COMPUTE INCREMENTAL STATS</codeph>
+      statement is all you need to keep table and column statistics up to date 
for a table,
+      sometimes for a very large table or one that is updated frequently, the 
length of time to recompute
+      all the statistics might make it impractical to run those statements as 
often as needed.
+      As a workaround, you can use the <codeph>ALTER TABLE</codeph> statement 
to set table statistics
+      at the level of the entire table or a single partition, or column 
statistics at the level of
+      the entire table.
+    </p>
+
+    <p>
+      You can set the <codeph>numrows</codeph> value for table statistics by 
changing the
+      <codeph>TBLPROPERTIES</codeph> setting for a table or partition.
+      For example:
+<codeblock conref="../shared/impala_common.xml#common/set_numrows_example"/>
+<codeblock 
conref="../shared/impala_common.xml#common/set_numrows_partitioned_example"/>
+      See <xref href="impala_perf_stats.xml#perf_table_stats_manual"/> for 
details.
+    </p>
+
+    <p rev="2.6.0 IMPALA-3369">
+      In CDH 5.8 / Impala 2.6 and higher, you can use the <codeph>SET COLUMN 
STATS</codeph> clause
+      to set a specific stats value for a particular column.
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/set_column_stats_example"/>
 
     <p>
       <b>To reorganize columns for a table:</b>
@@ -243,6 +403,36 @@ ALTER TABLE <varname>table_name</varname> DROP 
<varname>column_name</varname>;</
       to replace all the data before issuing any further queries.)
     </p>
 
+    <p rev="CDH-37178">
+      For example, here is how you might add columns to an existing table.
+      The first <codeph>ALTER TABLE</codeph> adds two new columns, and the 
second
+      <codeph>ALTER TABLE</codeph> adds one new column.
+      A single Impala query reads both the old and new data files, containing 
different numbers of columns.
+      For any columns not present in a particular data file, all the column 
values are
+      considered to be <codeph>NULL</codeph>.
+    </p>
+
+<codeblock rev="CDH-37178">
+create table t1 (x int);
+insert into t1 values (1), (2);
+
+alter table t1 add columns (s string, t timestamp);
+insert into t1 values (3, 'three', now());
+
+alter table t1 add columns (b boolean);
+insert into t1 values (4, 'four', now(), true);
+
+select * from t1 order by x;
++---+-------+-------------------------------+------+
+| x | s     | t                             | b    |
++---+-------+-------------------------------+------+
+| 1 | NULL  | NULL                          | NULL |
+| 2 | NULL  | NULL                          | NULL |
+| 3 | three | 2016-05-11 11:19:45.054457000 | NULL |
+| 4 | four  | 2016-05-11 11:20:20.260733000 | true |
++---+-------+-------------------------------+------+
+</codeblock>
+
     <p>
       You might use the <codeph>CHANGE</codeph> clause to rename a single 
column, or to treat an existing column as
       a different type than before, such as to switch between treating a 
column as <codeph>STRING</codeph> and
@@ -251,6 +441,138 @@ ALTER TABLE <varname>table_name</varname> DROP 
<varname>column_name</varname>;</
       define the new set of columns with a single <codeph>ALTER TABLE ... 
REPLACE COLUMNS</codeph> statement.
     </p>
 
+    <p rev="CDH-37178">
+      The following examples show some safe operations to drop or change 
columns. Dropping the final column
+      in a table lets Impala ignore the data causing any disruption to 
existing data files. Changing the type
+      of a column works if existing data values can be safely converted to the 
new type. The type conversion
+      rules depend on the file format of the underlying table. For example, in 
a text table, the same value
+      can be interpreted as a <codeph>STRING</codeph> or a numeric value, 
while in a binary format such as
+      Parquet, the rules are stricter and type conversions only work between 
certain sizes of integers.
+    </p>
+
+<codeblock rev="CDH-37178">
+create table optional_columns (x int, y int, z int, a1 int, a2 int);
+insert into optional_columns values (1,2,3,0,0), (2,3,4,100,100);
+
+-- When the last column in the table is dropped, Impala ignores the
+-- values that are no longer needed. (Dropping A1 but leaving A2
+-- would cause problems, as we will see in a subsequent example.)
+alter table optional_columns drop column a2;
+alter table optional_columns drop column a1;
+
+select * from optional_columns;
++---+---+---+
+| x | y | z |
++---+---+---+
+| 1 | 2 | 3 |
+| 2 | 3 | 4 |
++---+---+---+
+</codeblock>
+
+<codeblock rev="CDH-37178">
+create table int_to_string (s string, x int);
+insert into int_to_string values ('one', 1), ('two', 2);
+
+-- What was an INT column will now be interpreted as STRING.
+-- This technique works for text tables but not other file formats.
+-- The second X represents the new name of the column, which we keep the same.
+alter table int_to_string change x x string;
+
+-- Once the type is changed, we can insert non-integer values into the X column
+-- and treat that column as a string, for example by uppercasing or 
concatenating.
+insert into int_to_string values ('three', 'trois');
+select s, upper(x) from int_to_string;
++-------+----------+
+| s     | upper(x) |
++-------+----------+
+| one   | 1        |
+| two   | 2        |
+| three | TROIS    |
++-------+----------+
+</codeblock>
+
+    <p rev="CDH-37178">
+      Remember that Impala does not actually do any conversion for the 
underlying data files as a result of
+      <codeph>ALTER TABLE</codeph> statements. If you use <codeph>ALTER 
TABLE</codeph> to create a table
+      layout that does not agree with the contents of the underlying files, 
you must replace the files
+      yourself, such as using <codeph>LOAD DATA</codeph> to load a new set of 
data files, or
+      <codeph>INSERT OVERWRITE</codeph> to copy from another table and replace 
the original data.
+    </p>
+
+    <p rev="CDH-37178">
+      The following example shows what happens if you delete the middle column 
from a Parquet table containing three columns.
+      The underlying data files still contain three columns of data. Because 
the columns are interpreted based on their positions in
+      the data file instead of the specific column names, a <codeph>SELECT 
*</codeph> query now reads the first and second
+      columns from the data file, potentially leading to unexpected results or 
conversion errors.
+      For this reason, if you expect to someday drop a column, declare it as 
the last column in the table, where its data
+      can be ignored by queries after the column is dropped. Or, re-run your 
ETL process and create new data files
+      if you drop or change the type of a column in a way that causes problems 
with existing data files.
+    </p>
+
+<codeblock rev="CDH-37178">
+-- Parquet table showing how dropping a column can produce unexpected results.
+create table p1 (s1 string, s2 string, s3 string) stored as parquet;
+
+insert into p1 values ('one', 'un', 'uno'), ('two', 'deux', 'dos'),
+  ('three', 'trois', 'tres');
+select * from p1;
++-------+-------+------+
+| s1    | s2    | s3   |
++-------+-------+------+
+| one   | un    | uno  |
+| two   | deux  | dos  |
+| three | trois | tres |
++-------+-------+------+
+
+alter table p1 drop column s2;
+-- The S3 column contains unexpected results.
+-- Because S2 and S3 have compatible types, the query reads
+-- values from the dropped S2, because the existing data files
+-- still contain those values as the second column.
+select * from p1;
++-------+-------+
+| s1    | s3    |
++-------+-------+
+| one   | un    |
+| two   | deux  |
+| three | trois |
++-------+-------+
+</codeblock>
+
+<codeblock rev="CDH-37178">
+-- Parquet table showing how dropping a column can produce conversion errors.
+create table p2 (s1 string, x int, s3 string) stored as parquet;
+
+insert into p2 values ('one', 1, 'uno'), ('two', 2, 'dos'), ('three', 3, 
'tres');
+select * from p2;
++-------+---+------+
+| s1    | x | s3   |
++-------+---+------+
+| one   | 1 | uno  |
+| two   | 2 | dos  |
+| three | 3 | tres |
++-------+---+------+
+
+alter table p2 drop column x;
+select * from p2;
+WARNINGS: 
+File '<varname>hdfs_filename</varname>' has an incompatible Parquet schema for 
column 'add_columns.p2.s3'.
+Column type: STRING, Parquet schema:
+optional int32 x [i:1 d:1 r:0]
+
+File '<varname>hdfs_filename</varname>' has an incompatible Parquet schema for 
column 'add_columns.p2.s3'.
+Column type: STRING, Parquet schema:
+optional int32 x [i:1 d:1 r:0]
+</codeblock>
+
+    <p rev="IMPALA-3092">
+      In CDH 5.8 / Impala 2.6 and higher, if an Avro table is created without 
column definitions in the
+      <codeph>CREATE TABLE</codeph> statement, and columns are later
+      added through <codeph>ALTER TABLE</codeph>, the resulting
+      table is now queryable. Missing values from the newly added
+      columns now default to <codeph>NULL</codeph>.
+    </p>
+
     <p>
       <b>To change the file format that Impala expects data to be in, for a 
table or partition:</b>
     </p>
@@ -308,21 +630,94 @@ alter table p1 partition (month=2, day=2) set fileformat 
parquet;
       Here is an example showing the <codeph>ADD PARTITION</codeph> and 
<codeph>DROP PARTITION</codeph> clauses.
     </p>
 
-    <p rev="2.3.0">
-      The optional <codeph>PURGE</codeph> keyword, available in CDH 5.5 / 
Impala 2.3 and higher,
-      is used with the <codeph>DROP PARTITION</codeph> clause to remove 
associated HDFS data files
-      immediately rather than going through the HDFS trashcan mechanism.
-      Use this keyword when dropping a partition if it is
-      crucial to remove the data as quickly as possible to free up space, or 
if there is a problem with
-      the trashcan, such as the trashcan not being configured or being in a 
different HDFS encryption zone
-      than the data files.
-    </p>
+    <p>
+      To avoid errors while adding or dropping partitions whose existence is 
not certain,
+      add the optional <codeph>IF [NOT] EXISTS</codeph> clause between the 
<codeph>ADD</codeph> or
+      <codeph>DROP</codeph> keyword and the <codeph>PARTITION</codeph> 
keyword. That is, the entire
+      clause becomes <codeph>ADD IF NOT EXISTS PARTITION</codeph> or 
<codeph>DROP IF EXISTS PARTITION</codeph>.
+      The following example shows how partitions can be created automatically 
through <codeph>INSERT</codeph>
+      statements, or manually through <codeph>ALTER TABLE</codeph> statements. 
The <codeph>IF [NOT] EXISTS</codeph>
+      clauses let the <codeph>ALTER TABLE</codeph> statements succeed even if 
a new requested partition already
+      exists, or a partition to be dropped does not exist.
+    </p>
+
+<p>
+Inserting 2 year values creates 2 partitions:
+</p>
+
+<codeblock>
+create table partition_t (s string) partitioned by (y int);
+insert into partition_t (s,y) values ('two thousand',2000), ('nineteen 
ninety',1990);
+show partitions partition_t;
++-------+-------+--------+------+--------------+-------------------+--------+-------------------+
+| y     | #Rows | #Files | Size | Bytes Cached | Cache Replication | Format | 
Incremental stats |
++-------+-------+--------+------+--------------+-------------------+--------+-------+
+| 1990  | -1    | 1      | 16B  | NOT CACHED   | NOT CACHED        | TEXT   | 
false |
+| 2000  | -1    | 1      | 13B  | NOT CACHED   | NOT CACHED        | TEXT   | 
false |
+| Total | -1    | 2      | 29B  | 0B           |                   |        |  
     |
++-------+-------+--------+------+--------------+-------------------+--------+-------+
+</codeblock>
+
+<p>
+Without the <codeph>IF NOT EXISTS</codeph> clause, an attempt to add a new 
partition might fail:
+</p>
+
+<codeblock>
+alter table partition_t add partition (y=2000);
+ERROR: AnalysisException: Partition spec already exists: (y=2000).
+</codeblock>
+
+<p>
+The <codeph>IF NOT EXISTS</codeph> clause makes the statement succeed whether 
or not there was already a
+partition with the specified key value:
+</p>
+
+<codeblock>
+alter table partition_t add if not exists partition (y=2000);
+alter table partition_t add if not exists partition (y=2010);
+show partitions partition_t;
++-------+-------+--------+------+--------------+-------------------+--------+-------------------+
+| y     | #Rows | #Files | Size | Bytes Cached | Cache Replication | Format | 
Incremental stats |
++-------+-------+--------+------+--------------+-------------------+--------+-------+
+| 1990  | -1    | 1      | 16B  | NOT CACHED   | NOT CACHED        | TEXT   | 
false |
+| 2000  | -1    | 1      | 13B  | NOT CACHED   | NOT CACHED        | TEXT   | 
false |
+| 2010  | -1    | 0      | 0B   | NOT CACHED   | NOT CACHED        | TEXT   | 
false |
+| Total | -1    | 2      | 29B  | 0B           |                   |        |  
     |
++-------+-------+--------+------+--------------+-------------------+--------+-------+
+</codeblock>
+
+<p>
+Likewise, the <codeph>IF EXISTS</codeph> clause lets <codeph>DROP 
PARTITION</codeph> succeed whether or not the partition is already
+in the table:
+</p>
+
+<codeblock>
+alter table partition_t drop if exists partition (y=2000);
+alter table partition_t drop if exists partition (y=1950);
+show partitions partition_t;
++-------+-------+--------+------+--------------+-------------------+--------+-------------------+
+| y     | #Rows | #Files | Size | Bytes Cached | Cache Replication | Format | 
Incremental stats |
++-------+-------+--------+------+--------------+-------------------+--------+-------+
+| 1990  | -1    | 1      | 16B  | NOT CACHED   | NOT CACHED        | TEXT   | 
false |
+| 2010  | -1    | 0      | 0B   | NOT CACHED   | NOT CACHED        | TEXT   | 
false |
+| Total | -1    | 1      | 16B  | 0B           |                   |        |  
     |
++-------+-------+--------+------+--------------+-------------------+--------+-------+
+</codeblock>
 
-    <draft-comment translate="no">
-        Make example more general by partitioning by year/month/day.
+    <p rev="2.3.0"> The optional <codeph>PURGE</codeph> keyword, available in
+      CDH 5.5 / Impala 2.3 and higher, is used with the <codeph>DROP
+        PARTITION</codeph> clause to remove associated HDFS data files
+      immediately rather than going through the HDFS trashcan mechanism. Use
+      this keyword when dropping a partition if it is crucial to remove the 
data
+      as quickly as possible to free up space, or if there is a problem with 
the
+      trashcan, such as the trash cannot being configured or being in a
+      different HDFS encryption zone than the data files. </p>
+
+    <!--
+        To do: Make example more general by partitioning by year/month/day.
         Then could show inserting into fixed year, variable month and day;
         dropping particular year/month/day partition.
-      </draft-comment>
+    -->
 
 <codeblock>-- Create an empty table and define the partitioning scheme.
 create table part_t (x int) partitioned by (month int);


http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/3c2c8f12/docs/topics/impala_alter_view.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_alter_view.xml 
b/docs/topics/impala_alter_view.xml
index 0d83032..baf539d 100644
--- a/docs/topics/impala_alter_view.xml
+++ b/docs/topics/impala_alter_view.xml
@@ -3,7 +3,7 @@
 <concept rev="1.1" id="alter_view">
 
   <title>ALTER VIEW Statement</title>
-  <titlealts><navtitle>ALTER VIEW</navtitle></titlealts>
+  <titlealts audience="PDF"><navtitle>ALTER VIEW</navtitle></titlealts>
   <prolog>
     <metadata>
       <data name="Category" value="Impala"/>
@@ -11,6 +11,9 @@
       <data name="Category" value="DDL"/>
       <data name="Category" value="Tables"/>
       <data name="Category" value="Schemas"/>
+      <data name="Category" value="Views"/>
+      <data name="Category" value="Developers"/>
+      <data name="Category" value="Data Analysts"/>
     </metadata>
   </prolog>
 
@@ -18,9 +21,19 @@
 
     <p>
       <indexterm audience="Cloudera">ALTER VIEW statement</indexterm>
-      Changes the query associated with a view, or the associated database 
and/or name of the view.
+      Changes the characteristics of a view. The syntax has two forms:
     </p>
 
+    <ul>
+      <li>
+        The <codeph>AS</codeph> clause associates the view with a different 
query.
+      </li>
+      <li>
+        The <codeph>RENAME TO</codeph> clause changes the name of the view, 
moves the view to
+        a different database, or both.
+      </li>
+    </ul>
+
     <p>
       Because a view is purely a logical construct (an alias for a query) with 
no physical data behind it,
       <codeph>ALTER VIEW</codeph> only involves changes to metadata in the 
metastore database, not any data files

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/3c2c8f12/docs/topics/impala_analytic_functions.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_analytic_functions.xml 
b/docs/topics/impala_analytic_functions.xml
index 293a512..7fc9b3c 100644
--- a/docs/topics/impala_analytic_functions.xml
+++ b/docs/topics/impala_analytic_functions.xml
@@ -4,7 +4,7 @@
 
   <title>Impala Analytic Functions</title>
 
-  <titlealts>
+  <titlealts audience="PDF">
 
     <navtitle>Analytic Functions</navtitle>
 
@@ -25,7 +25,7 @@
 
   <conbody>
 
-    <p>
+    <p rev="2.0.0">
       <indexterm audience="Cloudera">analytic functions</indexterm>
 
       <indexterm audience="Cloudera">window functions</indexterm>
@@ -155,9 +155,12 @@ window_clause: See <xref href="#window_clause">Window 
Clause</xref>
         outermost block of a query. It divides the rows into groups containing 
identical values in one or more
         columns. These logical groups are known as <term>partitions</term>. 
Throughout the discussion of analytic
         functions, <q>partitions</q> refers to the groups produced by the 
<codeph>PARTITION BY</codeph> clause, not
-        to partitioned tables.
+        to partitioned tables. However, note the following limitation that 
applies specifically to analytic function
+        calls involving partitioned tables.
       </p>
 
+      <p 
conref="../shared/impala_common.xml#common/analytic_partition_pruning_caveat"/>
+
       <p>
         The sequence of results from an analytic function <q>resets</q> for 
each new partition in the result set.
         That is, the set of preceding or following rows considered by the 
analytic function always come from a
@@ -209,10 +212,6 @@ window_clause: See <xref href="#window_clause">Window 
Clause</xref>
         <xref href="impala_analytic_functions.xml#window_clause"/> for full 
details.
       </p>
 
-<!--
-      <p conref="/Content/impala_common_xi44078.xml#common/usage_notes_blurb"/>
--->
-
       <p conref="../shared/impala_common.xml#common/hbase_blurb"/>
 
       <p>
@@ -334,8 +333,6 @@ The size of the window varies depending on the order and 
distribution of values.
 </p>
 -->
 
-<!--      <p 
conref="/Content/impala_common_xi44078.xml#common/restrictions_blurb"/> -->
-
       <p>
         Currently, Impala supports only some combinations of arguments to the 
<codeph>RANGE</codeph> clause:
       </p>
@@ -447,7 +444,7 @@ output as above. Because <codeph>RANGE</codeph> currently 
does not support numer
 
   <concept id="avg_analytic">
 
-    <title>AVG() Function - Analytic Context</title>
+    <title>AVG Function - Analytic Context</title>
 
     <conbody>
 
@@ -462,7 +459,7 @@ output as above. Because <codeph>RANGE</codeph> currently 
does not support numer
 
   <concept id="count_analytic">
 
-    <title>COUNT() Function - Analytic Context</title>
+    <title>COUNT Function - Analytic Context</title>
 
     <conbody>
 
@@ -477,7 +474,7 @@ output as above. Because <codeph>RANGE</codeph> currently 
does not support numer
 
   <concept rev="2.3.0" id="cume_dist">
 
-    <title>CUME_DIST() Function (CDH 5.5 or higher only)</title>
+    <title>CUME_DIST Function (CDH 5.5 or higher only)</title>
 
     <conbody>
 
@@ -647,7 +644,7 @@ order by kind, ordering desc, name;
 
   <concept rev="2.0.0" id="dense_rank">
 
-    <title>DENSE_RANK() Function</title>
+    <title>DENSE_RANK Function</title>
 
     <conbody>
 
@@ -832,7 +829,7 @@ order by kind, ordering desc, name;
 
   <concept rev="2.0.0" id="first_value">
 
-    <title>FIRST_VALUE() Function</title>
+    <title>FIRST_VALUE Function</title>
 
     <conbody>
 
@@ -929,7 +926,7 @@ select country, name,
 
   <concept rev="2.0.0" id="lag">
 
-    <title>LAG() Function</title>
+    <title>LAG Function</title>
 
     <conbody>
 
@@ -1026,7 +1023,7 @@ select country, name,
 
   <concept rev="2.0.0" id="last_value">
 
-    <title>LAST_VALUE() Function</title>
+    <title>LAST_VALUE Function</title>
 
     <conbody>
 
@@ -1095,7 +1092,7 @@ select country, name,
 
   <concept rev="2.0.0" id="lead">
 
-    <title>LEAD() Function</title>
+    <title>LEAD Function</title>
 
     <conbody>
 
@@ -1167,7 +1164,7 @@ from stock_ticker
 
   <concept id="max_analytic">
 
-    <title>MAX() Function - Analytic Context</title>
+    <title>MAX Function - Analytic Context</title>
 
     <conbody>
 
@@ -1182,7 +1179,7 @@ from stock_ticker
 
   <concept id="min_analytic">
 
-    <title>MIN() Function - Analytic Context</title>
+    <title>MIN Function - Analytic Context</title>
 
     <conbody>
 
@@ -1197,7 +1194,7 @@ from stock_ticker
 
   <concept audience="Cloudera" rev="2.x.x" id="nth_value">
 
-    <title>NTH_VALUE() Function</title>
+    <title>NTH_VALUE Function</title>
 
     <conbody>
 
@@ -1213,7 +1210,7 @@ from stock_ticker
 
   <concept rev="2.3.0" id="ntile">
 
-    <title>NTILE() Function (CDH 5.5 or higher only)</title>
+    <title>NTILE Function (CDH 5.5 or higher only)</title>
 
     <conbody>
 
@@ -1348,7 +1345,7 @@ order by kilos desc;
 
   <concept rev="2.3.0" id="percent_rank">
 
-    <title>PERCENT_RANK() Function (CDH 5.5 or higher only)</title>
+    <title>PERCENT_RANK Function (CDH 5.5 or higher only)</title>
 
     <conbody>
 
@@ -1436,7 +1433,7 @@ insert into animals values ('Fire-breathing dragon', 
'Mythical', NULL);
 
   <concept rev="2.0.0" id="rank">
 
-    <title>RANK() Function</title>
+    <title>RANK Function</title>
 
     <conbody>
 
@@ -1618,7 +1615,7 @@ insert into animals values ('Fire-breathing dragon', 
'Mythical', NULL);
 
   <concept rev="2.0.0" id="row_number">
 
-    <title>ROW_NUMBER() Function</title>
+    <title>ROW_NUMBER Function</title>
 
     <conbody>
 
@@ -1726,7 +1723,7 @@ insert into animals values ('Fire-breathing dragon', 
'Mythical', NULL);
 
   <concept id="sum_analytic">
 
-    <title>SUM() Function - Analytic Context</title>
+    <title>SUM Function - Analytic Context</title>
 
     <conbody>
 

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/3c2c8f12/docs/topics/impala_appx_count_distinct.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_appx_count_distinct.xml 
b/docs/topics/impala_appx_count_distinct.xml
index 31a9679..854bbd0 100644
--- a/docs/topics/impala_appx_count_distinct.xml
+++ b/docs/topics/impala_appx_count_distinct.xml
@@ -2,17 +2,21 @@
 <!DOCTYPE concept PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd">
 <concept rev="2.0.0" id="appx_count_distinct">
 
-  <title>APPX_COUNT_DISTINCT Query Option</title>
+  <title>APPX_COUNT_DISTINCT Query Option (CDH 5.2 or higher only)</title>
+  <titlealts 
audience="PDF"><navtitle>APPX_COUNT_DISTINCT</navtitle></titlealts>
   <prolog>
     <metadata>
       <data name="Category" value="Impala"/>
       <data name="Category" value="Impala Query Options"/>
+      <data name="Category" value="Aggregate Functions"/>
+      <data name="Category" value="Developers"/>
+      <data name="Category" value="Data Analysts"/>
     </metadata>
   </prolog>
 
   <conbody>
 
-    <p>
+    <p rev="2.0.0">
       <indexterm audience="Cloudera">APPX_COUNT_DISTINCT query 
option</indexterm>
       Allows multiple <codeph>COUNT(DISTINCT)</codeph> operations within a 
single query, by internally rewriting
       each <codeph>COUNT(DISTINCT)</codeph> to use the <codeph>NDV()</codeph> 
function. The resulting count is

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/3c2c8f12/docs/topics/impala_appx_median.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_appx_median.xml 
b/docs/topics/impala_appx_median.xml
index d874ead..b97f6e3 100644
--- a/docs/topics/impala_appx_median.xml
+++ b/docs/topics/impala_appx_median.xml
@@ -3,11 +3,13 @@
 <concept rev="1.2.1" id="appx_median">
 
   <title>APPX_MEDIAN Function</title>
-  <titlealts><navtitle>APPX_MEDIAN</navtitle></titlealts>
+  <titlealts audience="PDF"><navtitle>APPX_MEDIAN</navtitle></titlealts>
   <prolog>
     <metadata>
       <data name="Category" value="Impala"/>
       <data name="Category" value="SQL"/>
+      <data name="Category" value="Developers"/>
+      <data name="Category" value="Data Analysts"/>
       <data name="Category" value="Impala Functions"/>
       <data name="Category" value="Aggregate Functions"/>
       <data name="Category" value="Querying"/>
@@ -48,7 +50,7 @@
       averaging.
     </p>
 
-<!-- <p 
conref="/Content/impala_common_xi44078.xml#common/restrictions_sliding_window"/>
 -->
+<!-- <p 
conref="../shared/impala_common.xml#common/restrictions_sliding_window"/> -->
 
     <p conref="../shared/impala_common.xml#common/restrictions_blurb"/>
 

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/3c2c8f12/docs/topics/impala_array.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_array.xml b/docs/topics/impala_array.xml
index 1e60795..4cab7f6 100644
--- a/docs/topics/impala_array.xml
+++ b/docs/topics/impala_array.xml
@@ -8,6 +8,9 @@
     <metadata>
       <data name="Category" value="Impala"/>
       <data name="Category" value="Impala Data Types"/>
+      <data name="Category" value="SQL"/>
+      <data name="Category" value="Developers"/>
+      <data name="Category" value="Data Analysts"/>
     </metadata>
   </prolog>

[22/23] incubator-impala git commit: Update all impala* files to the latest CDH 5.9/5.10 versions.

Reply via email to