[20/22] incubator-impala git commit: First try at porting over the source files necessary for the Impala SQL Reference.

jrussell Tue, 26 Jul 2016 16:05:17 -0700

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/463ddf92/docs/topics/impala_abort_on_default_limit_exceeded.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_abort_on_default_limit_exceeded.xml 
b/docs/topics/impala_abort_on_default_limit_exceeded.xml
new file mode 100644
index 0000000..c58be63
--- /dev/null
+++ b/docs/topics/impala_abort_on_default_limit_exceeded.xml
@@ -0,0 +1,20 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE concept PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd">
+<concept rev="obwl" id="abort_on_default_limit_exceeded">
+
+  <title>ABORT_ON_DEFAULT_LIMIT_EXCEEDED Query Option</title>
+  <prolog>
+    <metadata>
+      <data name="Category" value="Impala"/>
+      <data name="Category" value="Impala Query Options"/>
+    </metadata>
+  </prolog>
+
+  <conbody>
+
+    <p conref="../shared/impala_common.xml#common/obwl_query_options"/>
+
+    <p conref="../shared/impala_common.xml#common/type_boolean"/>
+    <p conref="../shared/impala_common.xml#common/default_false_0"/>
+  </conbody>
+</concept>


http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/463ddf92/docs/topics/impala_abort_on_error.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_abort_on_error.xml 
b/docs/topics/impala_abort_on_error.xml
new file mode 100644
index 0000000..1926333
--- /dev/null
+++ b/docs/topics/impala_abort_on_error.xml
@@ -0,0 +1,40 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE concept PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd">
+<concept id="abort_on_error">
+
+  <title>ABORT_ON_ERROR Query Option</title>
+  <prolog>
+    <metadata>
+      <data name="Category" value="Impala"/>
+      <data name="Category" value="Impala Query Options"/>
+      <data name="Category" value="Troubleshooting"/>
+    </metadata>
+  </prolog>
+
+  <conbody>
+
+    <p>
+      <indexterm audience="Cloudera">ABORT_ON_ERROR query option</indexterm>
+      When this option is enabled, Impala cancels a query immediately when any 
of the nodes encounters an error,
+      rather than continuing and possibly returning incomplete results. This 
option is disabled by default, to help
+      gather maximum diagnostic information when an error occurs, for example, 
whether the same problem occurred on
+      all nodes or only a single node. Currently, the errors that Impala can 
skip over involve data corruption,
+      such as a column that contains a string value when expected to contain 
an integer value.
+    </p>
+
+    <p>
+      To control how much logging Impala does for non-fatal errors when 
<codeph>ABORT_ON_ERROR</codeph> is turned
+      off, use the <codeph>MAX_ERRORS</codeph> option.
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/type_boolean"/>
+    <p conref="../shared/impala_common.xml#common/default_false_0"/>
+
+    <p conref="../shared/impala_common.xml#common/related_info"/>
+    <p>
+      <xref href="impala_max_errors.xml#max_errors"/>,
+      <xref href="impala_logging.xml#logging"/>
+    </p>
+
+  </conbody>
+</concept>

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/463ddf92/docs/topics/impala_aggregate_functions.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_aggregate_functions.xml 
b/docs/topics/impala_aggregate_functions.xml
new file mode 100644
index 0000000..5095266
--- /dev/null
+++ b/docs/topics/impala_aggregate_functions.xml
@@ -0,0 +1,33 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE concept PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd">
+<concept id="aggregate_functions">
+
+  <title>Impala Aggregate Functions</title>
+  <titlealts><navtitle>Aggregate Functions</navtitle></titlealts>
+  <prolog>
+    <metadata>
+      <data name="Category" value="Impala"/>
+      <data name="Category" value="Impala Functions"/>
+      <data name="Category" value="Aggregate Functions"/>
+      <data name="Category" value="SQL"/>
+      <data name="Category" value="Data Analysts"/>
+      <data name="Category" value="Developers"/>
+      <data name="Category" value="Querying"/>
+    </metadata>
+  </prolog>
+
+  <conbody>
+
+    <p conref="../shared/impala_common.xml#common/aggr1"/>
+
+<codeblock conref="../shared/impala_common.xml#common/aggr2"/>
+
+    <p conref="../shared/impala_common.xml#common/aggr3"/>
+
+    <p>
+      <indexterm audience="Cloudera">aggregate functions</indexterm>
+    </p>
+
+    <p outputclass="toc"/>
+  </conbody>
+</concept>

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/463ddf92/docs/topics/impala_aliases.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_aliases.xml b/docs/topics/impala_aliases.xml
new file mode 100644
index 0000000..66a16fe
--- /dev/null
+++ b/docs/topics/impala_aliases.xml
@@ -0,0 +1,71 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE concept PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd">
+<concept id="aliases">
+
+  <title>Overview of Impala Aliases</title>
+  <titlealts><navtitle>Aliases</navtitle></titlealts>
+  <prolog>
+    <metadata>
+      <data name="Category" value="Impala"/>
+      <data name="Category" value="SQL"/>
+      <data name="Category" value="Data Analysts"/>
+      <data name="Category" value="Developers"/>
+      <data name="Category" value="Querying"/>
+      <data name="Category" value="Tables"/>
+      <data name="Category" value="Schemas"/>
+    </metadata>
+  </prolog>
+
+  <conbody>
+
+    <p>
+      When you write the names of tables, columns, or column expressions in a 
query, you can assign an alias at the
+      same time. Then you can specify the alias rather than the original name 
when making other references to the
+      table or column in the same statement. You typically specify aliases 
that are shorter, easier to remember, or
+      both than the original names. The aliases are printed in the query 
header, making them useful for
+      self-documenting output.
+    </p>
+
+    <p>
+      To set up an alias, add the <codeph>AS <varname>alias</varname></codeph> 
clause immediately after any table,
+      column, or expression name in the <codeph>SELECT</codeph> list or 
<codeph>FROM</codeph> list of a query. The
+      <codeph>AS</codeph> keyword is optional; you can also specify the alias 
immediately after the original name.
+    </p>
+
+    <p>
+      To use an alias name that matches one of the Impala reserved keywords 
(listed in
+      <xref href="impala_reserved_words.xml#reserved_words"/>), surround the 
identifier with either single or
+      double quotation marks, or <codeph>``</codeph> characters (backticks).
+    </p>
+
+<codeblock>select c1 as name, c2 as address, c3 as phone from 
table_with_terse_columns;
+select sum(ss_xyz_dollars_net) as total_sales from table_with_cryptic_columns;
+select one.name, two.address, three.phone from
+  census one, building_directory two, phonebook three
+  where one.id = two.id and two.id = three.id;</codeblock>
+
+    <p>
+      <ph conref="../shared/impala_common.xml#common/aliases_vs_identifiers"/>
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/complex_types_blurb"/>
+
+    <p rev="2.3.0">
+      Queries involving the complex types (<codeph>ARRAY</codeph>,
+      <codeph>STRUCT</codeph>, and <codeph>MAP</codeph>), typically make
+      extensive use of table aliases. These queries involve join clauses
+      where the complex type column is treated as a joined table.
+      To construct two-part or three-part qualified names for the
+      complex column elements in the <codeph>FROM</codeph> list,
+      sometimes it is syntactically required to construct a table
+      alias for the complex column where it is referenced in the join clause.
+      See <xref href="impala_complex_types.xml#complex_types"/> for details 
and examples.
+    </p>
+
+    <p>
+      <b>Alternatives:</b>
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/views_vs_identifiers"/>
+  </conbody>
+</concept>

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/463ddf92/docs/topics/impala_allow_unsupported_formats.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_allow_unsupported_formats.xml 
b/docs/topics/impala_allow_unsupported_formats.xml
new file mode 100644
index 0000000..824daa4
--- /dev/null
+++ b/docs/topics/impala_allow_unsupported_formats.xml
@@ -0,0 +1,29 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE concept PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd">
+<concept id="allow_unsupported_formats">
+
+  <title>ALLOW_UNSUPPORTED_FORMATS Query Option</title>
+  <prolog>
+    <metadata>
+      <data name="Category" value="Impala"/>
+      <data name="Category" value="Impala Query Options"/>
+    </metadata>
+  </prolog>
+
+  <conbody>
+
+<!--
+The original brief explanation with not enough detail comes from the comments 
at:
+  
http://github.sf.cloudera.com/CDH/Impala/raw/master/common/thrift/ImpalaService.thrift
+Removing that wording from here after discussions with dev team. Just 
recording the URL for posterity.
+-->
+
+    <p>
+      An obsolete query option from early work on support for file formats. Do 
not use. Might be removed in the
+      future.
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/type_boolean"/>
+    <p conref="../shared/impala_common.xml#common/default_false_0"/>
+  </conbody>
+</concept>

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/463ddf92/docs/topics/impala_alter_table.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_alter_table.xml 
b/docs/topics/impala_alter_table.xml
new file mode 100644
index 0000000..800261a
--- /dev/null
+++ b/docs/topics/impala_alter_table.xml
@@ -0,0 +1,411 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE concept PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd">
+<concept id="alter_table">
+
+  <title>ALTER TABLE Statement</title>
+  <titlealts><navtitle>ALTER TABLE</navtitle></titlealts>
+  <prolog>
+    <metadata>
+      <data name="Category" value="Impala"/>
+      <data name="Category" value="Impala Data Types"/>
+      <data name="Category" value="SQL"/>
+      <data name="Category" value="DDL"/>
+      <data name="Category" value="HDFS Caching"/>
+      <data name="Category" value="Tables"/>
+      <data name="Category" value="Schemas"/>
+    </metadata>
+  </prolog>
+
+  <conbody>
+
+    <p>
+      <indexterm audience="Cloudera">ALTER TABLE statement</indexterm>
+      The <codeph>ALTER TABLE</codeph> statement changes the structure or 
properties of an existing table. In
+      Impala, this is a logical operation that updates the table metadata in 
the metastore database that Impala
+      shares with Hive; <codeph>ALTER TABLE</codeph> does not actually 
rewrite, move, and so on the actual data
+      files. Thus, you might need to perform corresponding physical filesystem 
operations, such as moving data
+      files to a different HDFS directory, rewriting the data files to include 
extra fields, or converting them to
+      a different file format.
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/syntax_blurb"/>
+
+<codeblock>ALTER TABLE 
[<varname>old_db_name</varname>.]<varname>old_table_name</varname> RENAME TO 
[<varname>new_db_name</varname>.]<varname>new_table_name</varname>
+
+ALTER TABLE <varname>name</varname> ADD COLUMNS (<varname>col_spec</varname>[, 
<varname>col_spec</varname> ...])
+ALTER TABLE <varname>name</varname> DROP [COLUMN] 
<varname>column_name</varname>
+ALTER TABLE <varname>name</varname> CHANGE <varname>column_name</varname> 
<varname>new_name</varname> <varname>new_type</varname>
+ALTER TABLE <varname>name</varname> REPLACE COLUMNS 
(<varname>col_spec</varname>[, <varname>col_spec</varname> ...])
+
+ALTER TABLE <varname>name</varname> { ADD | DROP } PARTITION 
(<varname>partition_spec</varname>) <ph rev="2.3.0">[PURGE]</ph>
+
+ALTER TABLE <varname>name</varname> [PARTITION 
(<varname>partition_spec</varname>)]
+  SET { FILEFORMAT <varname>file_format</varname>
+  | LOCATION '<varname>hdfs_path_of_directory</varname>'
+  | TBLPROPERTIES (<varname>table_properties</varname>)
+  | SERDEPROPERTIES (<varname>serde_properties</varname>) }
+
+<ph rev="1.4.0">ALTER TABLE <varname>name</varname> [PARTITION 
(<varname>partition_spec</varname>)] SET { CACHED IN 
'<varname>pool_name</varname>' <ph rev="2.2.0">[WITH REPLICATION = 
<varname>integer</varname>]</ph> | UNCACHED }</ph>
+
+<varname>new_name</varname> ::= 
[<varname>new_database</varname>.]<varname>new_table_name</varname>
+
+<varname>col_spec</varname> ::= <varname>col_name</varname> 
<varname>type_name</varname>
+
+<varname>partition_spec</varname> ::= 
<varname>partition_col</varname>=<varname>constant_value</varname>
+
+<varname>table_properties</varname> ::= 
'<varname>name</varname>'='<varname>value</varname>'[, 
'<varname>name</varname>'='<varname>value</varname>' ...]
+
+<varname>serde_properties</varname> ::= 
'<varname>name</varname>'='<varname>value</varname>'[, 
'<varname>name</varname>'='<varname>value</varname>' ...]
+
+<varname>file_format</varname> ::= { PARQUET | TEXTFILE | RCFILE | 
SEQUENCEFILE | AVRO }
+</codeblock>
+
+    <p conref="../shared/impala_common.xml#common/ddl_blurb"/>
+
+    <p conref="../shared/impala_common.xml#common/complex_types_blurb"/>
+
+    <p rev="2.3.0">
+      In CDH 5.5 / Impala 2.3 and higher, the <codeph>ALTER TABLE</codeph> 
statement can 
+      change the metadata for tables containing complex types 
(<codeph>ARRAY</codeph>,
+      <codeph>STRUCT</codeph>, and <codeph>MAP</codeph>).
+      For example, you can use an <codeph>ADD COLUMNS</codeph>, <codeph>DROP 
COLUMN</codeph>, or <codeph>CHANGE</codeph>
+      clause to modify the table layout for complex type columns.
+      Although Impala queries only work for complex type columns in Parquet 
tables, the complex type support in the
+      <codeph>ALTER TABLE</codeph> statement applies to all file formats.
+      For example, you can use Impala to update metadata for a staging table 
in a non-Parquet file format where the
+      data is populated by Hive. Or you can use <codeph>ALTER TABLE SET 
FILEFORMAT</codeph> to change the format
+      of an existing table to Parquet so that Impala can query it. (Remember 
that changing the file format for a table does
+      not convert the data files within the table; you must prepare any 
Parquet data files containing complex types
+      outside Impala, and bring them into the table using <codeph>LOAD 
DATA</codeph> or updating the table's
+      <codeph>LOCATION</codeph> property.
+      See <xref href="impala_complex_types.xml#complex_types"/> for details 
about using complex types.
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/usage_notes_blurb"/>
+
+    <p>
+      Whenever you specify partitions in an <codeph>ALTER TABLE</codeph> 
statement, through the <codeph>PARTITION
+      (<varname>partition_spec</varname>)</codeph> clause, you must include 
all the partitioning columns in the
+      specification.
+    </p>
+
+    <p>
+      Most of the <codeph>ALTER TABLE</codeph> operations work the same for 
internal tables (managed by Impala) as
+      for external tables (with data files located in arbitrary locations). 
The exception is renaming a table; for
+      an external table, the underlying data directory is not renamed or moved.
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/s3_blurb"/>
+    <p rev="2.2.0">
+      You can specify an <codeph>s3a://</codeph> prefix in the 
<codeph>LOCATION</codeph> attribute of a table or partition
+      to make Impala query data from the Amazon S3 filesystem.
+      See <xref href="impala_s3.xml#s3"/> for details.
+    </p>
+
+    <p rev="1.4.0">
+      <b>HDFS caching (CACHED IN clause):</b>
+    </p>
+
+    <p rev="1.4.0">
+      If you specify the <codeph>CACHED IN</codeph> clause, any existing or 
future data files in the table
+      directory or the partition subdirectories are designated to be loaded 
into memory with the HDFS caching
+      mechanism. See <xref href="impala_perf_hdfs_caching.xml#hdfs_caching"/> 
for details about using the HDFS
+      caching feature.
+    </p>
+
+    <p 
conref="../shared/impala_common.xml#common/impala_cache_replication_factor"/>
+
+    <p conref="../shared/impala_common.xml#common/sync_ddl_blurb"/>
+
+    <p>
+      The following sections show examples of the use cases for various 
<codeph>ALTER TABLE</codeph> clauses.
+    </p>
+
+    <p>
+      <b>To rename a table (RENAME TO clause):</b>
+    </p>
+
+<!-- Beefing up the syntax in its original location up to, don't need to 
repeat it here.
+<codeblock>ALTER TABLE <varname>old_name</varname> RENAME TO 
<varname>new_name</varname>;</codeblock>
+-->
+
+    <p>
+      The <codeph>RENAME TO</codeph> clause lets you change the name of an 
existing table, and optionally which
+      database it is located in.
+    </p>
+
+    <p>
+      For internal tables, his operation physically renames the directory 
within HDFS that contains the data files;
+      the original directory name no longer exists. By qualifying the table 
names with database names, you can use
+      this technique to move an internal table (and its associated data 
directory) from one database to another.
+      For example:
+    </p>
+
+<codeblock>create database d1;
+create database d2;
+create database d3;
+use d1;
+create table mobile (x int);
+use d2;
+-- Move table from another database to the current one.
+alter table d1.mobile rename to mobile;
+use d1;
+-- Move table from one database to another.
+alter table d2.mobile rename to d3.mobile;</codeblock>
+
+    <p>
+      For external tables,
+    </p>
+
+    <p>
+      <b>To change the physical location where Impala looks for data files 
associated with a table or
+      partition:</b>
+    </p>
+
+<codeblock>ALTER TABLE <varname>table_name</varname> [PARTITION 
(<varname>partition_spec</varname>)] SET LOCATION 
'<varname>hdfs_path_of_directory</varname>';</codeblock>
+
+    <p>
+      The path you specify is the full HDFS path where the data files reside, 
or will be created. Impala does not
+      create any additional subdirectory named after the table. Impala does 
not move any data files to this new
+      location or change any data files that might already exist in that 
directory.
+    </p>
+
+    <p>
+      To set the location for a single partition, include the 
<codeph>PARTITION</codeph> clause. Specify all the
+      same partitioning columns for the table, with a constant value for each, 
to precisely identify the single
+      partition affected by the statement:
+    </p>
+
+<codeblock>create table p1 (s string) partitioned by (month int, day int);
+-- Each ADD PARTITION clause creates a subdirectory in HDFS.
+alter table p1 add partition (month=1, day=1);
+alter table p1 add partition (month=1, day=2);
+alter table p1 add partition (month=2, day=1);
+alter table p1 add partition (month=2, day=2);
+-- Redirect queries, INSERT, and LOAD DATA for one partition
+-- to a specific different directory.
+alter table p1 partition (month=1, day=1) set location 
'/usr/external_data/new_years_day';
+</codeblock>
+
+    <note 
conref="../shared/impala_common.xml#common/add_partition_set_location"/>
+
+    <p rev="1.2">
+      <b>To change the key-value pairs of the TBLPROPERTIES and 
SERDEPROPERTIES fields:</b>
+    </p>
+
+<codeblock>ALTER TABLE <varname>table_name</varname> SET TBLPROPERTIES 
('<varname>key1</varname>'='<varname>value1</varname>', 
'<varname>key2</varname>'='<varname>value2</varname>'[, ...]);
+ALTER TABLE <varname>table_name</varname> SET SERDEPROPERTIES 
('<varname>key1</varname>'='<varname>value1</varname>', 
'<varname>key2</varname>'='<varname>value2</varname>'[, ...]);</codeblock>
+
+    <p>
+      The <codeph>TBLPROPERTIES</codeph> clause is primarily a way to 
associate arbitrary user-specified data items
+      with a particular table.
+    </p>
+
+    <p>
+      The <codeph>SERDEPROPERTIES</codeph> clause sets up metadata defining 
how tables are read or written, needed
+      in some cases by Hive but not used extensively by Impala. You would use 
this clause primarily to change the
+      delimiter in an existing text table or partition, by setting the 
<codeph>'serialization.format'</codeph> and
+      <codeph>'field.delim'</codeph> property values to the new delimiter 
character:
+    </p>
+
+<codeblock>-- This table begins life as pipe-separated text format.
+create table change_to_csv (s1 string, s2 string) row format delimited fields 
terminated by '|';
+-- Then we change it to a CSV table.
+alter table change_to_csv set SERDEPROPERTIES ('serialization.format'=',', 
'field.delim'=',');
+insert overwrite change_to_csv values ('stop','go'), ('yes','no');
+!hdfs dfs -cat 
'hdfs://<varname>hostname</varname>:8020/<varname>data_directory</varname>/<varname>dbname</varname>.db/change_to_csv/<varname>data_file</varname>';
+stop,go
+yes,no</codeblock>
+
+    <p>
+      Use the <codeph>DESCRIBE FORMATTED</codeph> statement to see the current 
values of these properties for an
+      existing table. See <xref href="impala_create_table.xml#create_table"/> 
for more details about these clauses.
+      See <xref href="impala_perf_stats.xml#perf_stats_manual"/> for an 
example of using table properties to
+      fine-tune the performance-related table statistics.
+    </p>
+
+    <p>
+      <b>To reorganize columns for a table:</b>
+    </p>
+
+<codeblock>ALTER TABLE <varname>table_name</varname> ADD COLUMNS 
(<varname>column_defs</varname>);
+ALTER TABLE <varname>table_name</varname> REPLACE COLUMNS 
(<varname>column_defs</varname>);
+ALTER TABLE <varname>table_name</varname> CHANGE 
<varname>column_name</varname> <varname>new_name</varname> 
<varname>new_type</varname>;
+ALTER TABLE <varname>table_name</varname> DROP 
<varname>column_name</varname>;</codeblock>
+
+    <p>
+      The <varname>column_spec</varname> is the same as in the <codeph>CREATE 
TABLE</codeph> statement: the column
+      name, then its data type, then an optional comment. You can add multiple 
columns at a time. The parentheses
+      are required whether you add a single column or multiple columns. When 
you replace columns, all the original
+      column definitions are discarded. You might use this technique if you 
receive a new set of data files with
+      different data types or columns in a different order. (The data files 
are retained, so if the new columns are
+      incompatible with the old ones, use <codeph>INSERT OVERWRITE</codeph> or 
<codeph>LOAD DATA OVERWRITE</codeph>
+      to replace all the data before issuing any further queries.)
+    </p>
+
+    <p>
+      You might use the <codeph>CHANGE</codeph> clause to rename a single 
column, or to treat an existing column as
+      a different type than before, such as to switch between treating a 
column as <codeph>STRING</codeph> and
+      <codeph>TIMESTAMP</codeph>, or between <codeph>INT</codeph> and 
<codeph>BIGINT</codeph>. You can only drop a
+      single column at a time; to drop multiple columns, issue multiple 
<codeph>ALTER TABLE</codeph> statements, or
+      define the new set of columns with a single <codeph>ALTER TABLE ... 
REPLACE COLUMNS</codeph> statement.
+    </p>
+
+    <p>
+      <b>To change the file format that Impala expects data to be in, for a 
table or partition:</b>
+    </p>
+
+    <p>
+      Use an <codeph>ALTER TABLE ... SET FILEFORMAT</codeph> clause. You can 
include an optional <codeph>PARTITION
+      (<varname>col1</varname>=<varname>val1</varname>, 
<varname>col2</varname>=<varname>val2</varname>,
+      ...</codeph> clause so that the file format is changed for a specific 
partition rather than the entire table.
+    </p>
+
+    <p>
+      Because this operation only changes the table metadata, you must do any 
conversion of existing data using
+      regular Hadoop techniques outside of Impala. Any new data created by the 
Impala <codeph>INSERT</codeph>
+      statement will be in the new format. You cannot specify the delimiter 
for Text files; the data files must be
+      comma-delimited.
+<!-- Although Impala can read Avro tables
+        created through Hive, you cannot specify the Avro file format in an 
Impala
+        <codeph>ALTER TABLE</codeph> statement. -->
+    </p>
+
+    <p>
+      To set the file format for a single partition, include the 
<codeph>PARTITION</codeph> clause. Specify all the
+      same partitioning columns for the table, with a constant value for each, 
to precisely identify the single
+      partition affected by the statement:
+    </p>
+
+<codeblock>create table p1 (s string) partitioned by (month int, day int);
+-- Each ADD PARTITION clause creates a subdirectory in HDFS.
+alter table p1 add partition (month=1, day=1);
+alter table p1 add partition (month=1, day=2);
+alter table p1 add partition (month=2, day=1);
+alter table p1 add partition (month=2, day=2);
+-- Queries and INSERT statements will read and write files
+-- in this format for this specific partition.
+alter table p1 partition (month=2, day=2) set fileformat parquet;
+</codeblock>
+
+    <p>
+      <b>To add or drop partitions for a table</b>, the table must already be 
partitioned (that is, created with a
+      <codeph>PARTITIONED BY</codeph> clause). The partition is a physical 
directory in HDFS, with a name that
+      encodes a particular column value (the <b>partition key</b>). The Impala 
<codeph>INSERT</codeph> statement
+      already creates the partition if necessary, so the <codeph>ALTER TABLE 
... ADD PARTITION</codeph> is
+      primarily useful for importing data by moving or copying existing data 
files into the HDFS directory
+      corresponding to a partition. (You can use the <codeph>LOAD 
DATA</codeph> statement to move files into the
+      partition directory, or <codeph>ALTER TABLE ... PARTITION (...) SET 
LOCATION</codeph> to point a partition at
+      a directory that already contains data files.
+    </p>
+
+    <p>
+      The <codeph>DROP PARTITION</codeph> clause is used to remove the HDFS 
directory and associated data files for
+      a particular set of partition key values; for example, if you always 
analyze the last 3 months worth of data,
+      at the beginning of each month you might drop the oldest partition that 
is no longer needed. Removing
+      partitions reduces the amount of metadata associated with the table and 
the complexity of calculating the
+      optimal query plan, which can simplify and speed up queries on 
partitioned tables, particularly join queries.
+      Here is an example showing the <codeph>ADD PARTITION</codeph> and 
<codeph>DROP PARTITION</codeph> clauses.
+    </p>
+
+    <p rev="2.3.0">
+      The optional <codeph>PURGE</codeph> keyword, available in CDH 5.5 / 
Impala 2.3 and higher,
+      is used with the <codeph>DROP PARTITION</codeph> clause to remove 
associated HDFS data files
+      immediately rather than going through the HDFS trashcan mechanism.
+      Use this keyword when dropping a partition if it is
+      crucial to remove the data as quickly as possible to free up space, or 
if there is a problem with
+      the trashcan, such as the trashcan not being configured or being in a 
different HDFS encryption zone
+      than the data files.
+    </p>
+
+    <draft-comment translate="no">
+        Make example more general by partitioning by year/month/day.
+        Then could show inserting into fixed year, variable month and day;
+        dropping particular year/month/day partition.
+      </draft-comment>
+
+<codeblock>-- Create an empty table and define the partitioning scheme.
+create table part_t (x int) partitioned by (month int);
+-- Create an empty partition into which you could copy data files from some 
other source.
+alter table part_t add partition (month=1);
+-- After changing the underlying data, issue a REFRESH statement to make the 
data visible in Impala.
+refresh part_t;
+-- Later, do the same for the next month.
+alter table part_t add partition (month=2);
+
+-- Now you no longer need the older data.
+alter table part_t drop partition (month=1);
+-- If the table was partitioned by month and year, you would issue a statement 
like:
+-- alter table part_t drop partition (year=2003,month=1);
+-- which would require 12 ALTER TABLE statements to remove a year's worth of 
data.
+
+-- If the data files for subsequent months were in a different file format,
+-- you could set a different file format for the new partition as you create 
it.
+alter table part_t add partition (month=3) set fileformat=parquet;
+</codeblock>
+
+    <p>
+      The value specified for a partition key can be an arbitrary constant 
expression, without any references to
+      columns. For example:
+    </p>
+
+<codeblock>alter table time_data add partition (month=concat('Decem','ber'));
+alter table sales_data add partition (zipcode = cast(9021 * 10 as 
string));</codeblock>
+
+    <note>
+      <p>
+        An alternative way to reorganize a table and its associated data files 
is to use <codeph>CREATE
+        TABLE</codeph> to create a variation of the original table, then use 
<codeph>INSERT</codeph> to copy the
+        transformed or reordered data to the new table. The advantage of 
<codeph>ALTER TABLE</codeph> is that it
+        avoids making a duplicate copy of the data files, allowing you to 
reorganize huge volumes of data in a
+        space-efficient way using familiar Hadoop techniques.
+      </p>
+    </note>
+
+    <p>
+      <b>To switch a table between internal and external:</b>
+    </p>
+
+    <p 
conref="../shared/impala_common.xml#common/switch_internal_external_table"/>
+
+    <p conref="../shared/impala_common.xml#common/cancel_blurb_no"/>
+
+    <p conref="../shared/impala_common.xml#common/permissions_blurb"/>
+    <p rev="CDH-19187">
+      Most <codeph>ALTER TABLE</codeph> clauses do not actually
+      read or write any HDFS files, and so do not depend on
+      specific HDFS permissions. For example, the <codeph>SET 
FILEFORMAT</codeph>
+      clause does not actually check the file format existing data files or
+      convert them to the new format, and the <codeph>SET LOCATION</codeph> 
clause
+      does not require any special permissions on the new location.
+      (Any permission-related failures would come later, when you
+      actually query or insert into the table.)
+    </p>
+<!-- Haven't rigorously tested all the assertions in the following paragraph. 
-->
+<!-- Most testing so far has been around RENAME TO clause. -->
+    <p>
+      In general, <codeph>ALTER TABLE</codeph> clauses that do touch
+      HDFS files and directories require the same HDFS permissions
+      as corresponding <codeph>CREATE</codeph>, <codeph>INSERT</codeph>,
+      or <codeph>SELECT</codeph> statements.
+      The permissions allow
+      the user ID that the <cmdname>impalad</cmdname> daemon runs under,
+      typically the <codeph>impala</codeph> user, to read or write
+      files or directories, or (in the case of the execute bit) descend into a 
directory.
+      The <codeph>RENAME TO</codeph> clause requires read, write, and execute 
permission in the
+      source and destination database directories and in the table data 
directory,
+      and read and write permission for the data files within the table.
+      The <codeph>ADD PARTITION</codeph> and <codeph>DROP PARTITION</codeph> 
clauses
+      require write and execute permissions for the associated partition 
directory.
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/related_info"/>
+
+    <p>
+      <xref href="impala_tables.xml#tables"/>,
+      <xref href="impala_create_table.xml#create_table"/>, <xref 
href="impala_drop_table.xml#drop_table"/>,
+      <xref href="impala_partitioning.xml#partitioning"/>, <xref 
href="impala_tables.xml#internal_tables"/>,
+      <xref href="impala_tables.xml#external_tables"/>
+    </p>
+  </conbody>
+</concept>

http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/463ddf92/docs/topics/impala_alter_view.xml
----------------------------------------------------------------------
diff --git a/docs/topics/impala_alter_view.xml 
b/docs/topics/impala_alter_view.xml
new file mode 100644
index 0000000..0d83032
--- /dev/null
+++ b/docs/topics/impala_alter_view.xml
@@ -0,0 +1,73 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!DOCTYPE concept PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd">
+<concept rev="1.1" id="alter_view">
+
+  <title>ALTER VIEW Statement</title>
+  <titlealts><navtitle>ALTER VIEW</navtitle></titlealts>
+  <prolog>
+    <metadata>
+      <data name="Category" value="Impala"/>
+      <data name="Category" value="SQL"/>
+      <data name="Category" value="DDL"/>
+      <data name="Category" value="Tables"/>
+      <data name="Category" value="Schemas"/>
+    </metadata>
+  </prolog>
+
+  <conbody>
+
+    <p>
+      <indexterm audience="Cloudera">ALTER VIEW statement</indexterm>
+      Changes the query associated with a view, or the associated database 
and/or name of the view.
+    </p>
+
+    <p>
+      Because a view is purely a logical construct (an alias for a query) with 
no physical data behind it,
+      <codeph>ALTER VIEW</codeph> only involves changes to metadata in the 
metastore database, not any data files
+      in HDFS.
+    </p>
+
+<!-- View _permissions_ don't rely on underlying table. -->
+
+<!-- Could use views to grant access only to certain columns. -->
+
+<!-- Treated like a table for authorization. -->
+
+<!-- ALTER VIEW that queries another view - possibly a runtime error. -->
+
+    <p conref="../shared/impala_common.xml#common/syntax_blurb"/>
+
+<codeblock>ALTER VIEW 
[<varname>database_name</varname>.]<varname>view_name</varname> AS 
<varname>select_statement</varname>
+ALTER VIEW [<varname>database_name</varname>.]<varname>view_name</varname> 
RENAME TO 
[<varname>database_name</varname>.]<varname>view_name</varname></codeblock>
+
+    <p conref="../shared/impala_common.xml#common/ddl_blurb"/>
+
+    <p conref="../shared/impala_common.xml#common/sync_ddl_blurb"/>
+
+    <p conref="../shared/impala_common.xml#common/security_blurb"/>
+    <p conref="../shared/impala_common.xml#common/redaction_yes"/>
+
+    <p conref="../shared/impala_common.xml#common/cancel_blurb_no"/>
+
+    <p conref="../shared/impala_common.xml#common/permissions_blurb_no"/>
+
+    <p conref="../shared/impala_common.xml#common/example_blurb"/>
+
+<codeblock>create table t1 (x int, y int, s string);
+create table t2 like t1;
+create view v1 as select * from t1;
+alter view v1 as select * from t2;
+alter view v1 as select x, upper(s) s from t2;</codeblock>
+
+<!-- Repeat the same blurb + example to see the definition of a view, as in 
CREATE VIEW. -->
+
+    <p conref="../shared/impala_common.xml#common/describe_formatted_view"/>
+
+    <p conref="../shared/impala_common.xml#common/related_info"/>
+
+    <p>
+      <xref href="impala_views.xml#views"/>, <xref 
href="impala_create_view.xml#create_view"/>,
+      <xref href="impala_drop_view.xml#drop_view"/>
+    </p>
+  </conbody>
+</concept>

[20/22] incubator-impala git commit: First try at porting over the source files necessary for the Impala SQL Reference.

Reply via email to