Add files that weren't needed during initial build testing of SQL Reference.
Everything from main CDH doc repo with filename matching impala_*.xml. Not touching any files that were already there. This should just get us very close to having no dangling references. Project: http://git-wip-us.apache.org/repos/asf/incubator-impala/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-impala/commit/bb88fdc0 Tree: http://git-wip-us.apache.org/repos/asf/incubator-impala/tree/bb88fdc0 Diff: http://git-wip-us.apache.org/repos/asf/incubator-impala/diff/bb88fdc0 Branch: refs/heads/doc_prototype Commit: bb88fdc0a78a866096cf1c78e1f8d57deee9e342 Parents: 4bf483c Author: John Russell <[email protected]> Authored: Sun Oct 30 22:51:29 2016 -0700 Committer: John Russell <[email protected]> Committed: Sun Oct 30 22:51:29 2016 -0700 ---------------------------------------------------------------------- docs/topics/impala.xml | 77 + docs/topics/impala_alter_function.xml | 21 + docs/topics/impala_breakpad.xml | 255 + docs/topics/impala_cdh.xml | 25 + docs/topics/impala_create_data_source.xml | 35 + docs/topics/impala_data_sources.xml | 22 + docs/topics/impala_date.xml | 104 + docs/topics/impala_disable_cached_reads.xml | 36 + docs/topics/impala_disable_outermost_topn.xml | 29 + .../impala_disable_row_runtime_filtering.xml | 65 + ...impala_disable_streaming_preaggregations.xml | 45 + docs/topics/impala_drop_data_source.xml | 35 + docs/topics/impala_errata.xml | 1378 ++++ docs/topics/impala_faq_base.xml | 24 + docs/topics/impala_features.xml | 21 + docs/topics/impala_fixed_issues.xml | 7076 ++++++++++++++++++ docs/topics/impala_glossary.xml | 834 +++ docs/topics/impala_howto_rm.xml | 420 ++ docs/topics/impala_known_issues.xml | 1812 +++++ docs/topics/impala_max_block_mgr_memory.xml | 30 + docs/topics/impala_max_num_runtime_filters.xml | 61 + .../impala_optimize_partition_key_scans.xml | 180 + .../impala_parquet_annotate_strings_utf8.xml | 50 + ...mpala_parquet_fallback_schema_resolution.xml | 49 + docs/topics/impala_perf_ddl.xml | 42 + docs/topics/impala_prefetch_mode.xml | 49 + docs/topics/impala_query_lifetime.xml | 31 + docs/topics/impala_relnotes.xml | 34 + docs/topics/impala_replica_preference.xml | 48 + docs/topics/impala_rm_initial_mem.xml | 29 + .../topics/impala_runtime_bloom_filter_size.xml | 93 + docs/topics/impala_runtime_filter_max_size.xml | 51 + docs/topics/impala_runtime_filter_min_size.xml | 51 + docs/topics/impala_runtime_filter_mode.xml | 77 + .../impala_runtime_filter_wait_time_ms.xml | 47 + docs/topics/impala_runtime_filtering.xml | 506 ++ docs/topics/impala_s3_skip_insert_staging.xml | 77 + .../impala_scan_node_codegen_threshold.xml | 75 + docs/topics/impala_schedule_random_replica.xml | 80 + docs/topics/impala_seq_compression_mode.xml | 29 + docs/topics/impala_trouble_bad_results.xml | 25 + docs/topics/impala_trouble_memory.xml | 25 + docs/topics/impala_trouble_query_fail.xml | 24 + docs/topics/impala_trouble_sql.xml | 25 + docs/topics/impala_trouble_startup.xml | 25 + docs/topics/impala_window_functions.xml | 23 + 46 files changed, 14150 insertions(+) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/bb88fdc0/docs/topics/impala.xml ---------------------------------------------------------------------- diff --git a/docs/topics/impala.xml b/docs/topics/impala.xml new file mode 100644 index 0000000..8dc7b2a --- /dev/null +++ b/docs/topics/impala.xml @@ -0,0 +1,77 @@ +<?xml version="1.0" encoding="UTF-8"?> +<!DOCTYPE concept PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd"> +<concept id="about_impala"> + + <title>Apache Impala (incubating) - Interactive SQL</title> + <titlealts audience="PDF"><navtitle>Impala Guide</navtitle></titlealts> + <prolog> + <metadata> + <data name="Category" value="Impala"/> + <data name="Category" value="Components"/> + <data name="Category" value="Data Analysts"/> + <data name="Category" value="Developers"/> + <data name="Category" value="SQL"/> + </metadata> + </prolog> + + <conbody> + + <p conref="../shared/impala_common.xml#common/impala_mission_statement"/> + + <p conref="../shared/impala_common.xml#common/impala_hive_compatibility"/> + + <p conref="../shared/impala_common.xml#common/impala_advantages"/> + + <p outputclass="toc"/> + + <p audience="integrated"> + <b>Related information throughout the CDH 5 library:</b> + </p> + + <p audience="integrated"> + In CDH 5, the Impala documentation for Release Notes, Installation, Upgrading, and Security has been + integrated alongside the corresponding information for other Hadoop components: + </p> + +<!-- Same list is in impala.xml and Impala FAQs. Conref in both places. --> + + <ul> + <li> + <xref href="impala_new_features.xml#new_features">New features</xref> + </li> + + <li> + <xref href="impala_known_issues.xml#known_issues">Known and fixed issues</xref> + </li> + + <li> + <xref href="impala_incompatible_changes.xml#incompatible_changes">Incompatible changes</xref> + </li> + + <li> + <xref href="impala_install.xml#install">Installing Impala</xref> + </li> + + <li> + <xref href="impala_upgrading.xml#upgrading">Upgrading Impala</xref> + </li> + + <li> + <xref href="impala_config.xml#config">Configuring Impala</xref> + </li> + + <li> + <xref href="impala_processes.xml#processes">Starting Impala</xref> + </li> + + <li> + <xref href="impala_security.xml#security">Security for Impala</xref> + </li> + + <li> + <xref href="http://www.cloudera.com/content/cloudera-content/cloudera-docs/CDH5/latest/CDH-Version-and-Packaging-Information/CDH-Version-and-Packaging-Information.html" scope="external" format="html">CDH + Version and Packaging Information</xref> + </li> + </ul> + </conbody> +</concept> http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/bb88fdc0/docs/topics/impala_alter_function.xml ---------------------------------------------------------------------- diff --git a/docs/topics/impala_alter_function.xml b/docs/topics/impala_alter_function.xml new file mode 100644 index 0000000..2ea0a26 --- /dev/null +++ b/docs/topics/impala_alter_function.xml @@ -0,0 +1,21 @@ +<?xml version="1.0" encoding="UTF-8"?> +<!DOCTYPE concept PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd"> +<concept audience="Cloudera" rev="1.x" id="alter_function"> + + <title>ALTER FUNCTION Statement</title> + <titlealts audience="PDF"><navtitle>ALTER FUNCTION</navtitle></titlealts> + <prolog> + <metadata> + <data name="Category" value="Impala"/> + <data name="Category" value="SQL"/> + <data name="Category" value="DDL"/> + <data name="Category" value="Developers"/> + <data name="Category" value="Data Analysts"/> + </metadata> + </prolog> + + <conbody> + + <p/> + </conbody> +</concept> http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/bb88fdc0/docs/topics/impala_breakpad.xml ---------------------------------------------------------------------- diff --git a/docs/topics/impala_breakpad.xml b/docs/topics/impala_breakpad.xml new file mode 100644 index 0000000..026fa1d --- /dev/null +++ b/docs/topics/impala_breakpad.xml @@ -0,0 +1,255 @@ +<?xml version="1.0" encoding="UTF-8"?> +<!DOCTYPE concept PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd"> +<concept id="breakpad" rev="2.6.0 IMPALA-2686 CDH-40238"> + + <title>Breakpad Minidumps for Impala (CDH 5.8 or higher only)</title> + <titlealts audience="PDF"><navtitle>Breakpad Minidumps</navtitle></titlealts> + <prolog> + <metadata> + <data name="Category" value="Impala"/> + <data name="Category" value="Troubleshooting"/> + <data name="Category" value="Support"/> + <data name="Category" value="Administrators"/> + </metadata> + </prolog> + + <conbody> + + <p rev="2.6.0 IMPALA-2686 CDH-40238"> + The <xref href="https://chromium.googlesource.com/breakpad/breakpad/" scope="external" format="html">breakpad</xref> + project is an open-source framework for crash reporting. + In CDH 5.8 / Impala 2.6 and higher, Impala can use <codeph>breakpad</codeph> to record stack information and + register values when any of the Impala-related daemons crash due to an error such as <codeph>SIGSEGV</codeph> + or unhandled exceptions. + The dump files are much smaller than traditional core dump files. The dump mechanism itself uses very little + memory, which improves reliability if the crash occurs while the system is low on memory. + </p> + + <note type="important"> + Because of the internal mechanisms involving Impala memory allocation and Linux + signalling for out-of-memory (OOM) errors, if an Impala-related daemon experiences a + crash due to an OOM condition, it does <i>not</i> generate a minidump for that error. + <p> + + </p> + </note> + + + <p outputclass="toc inpage" audience="PDF"/> + + </conbody> + + <concept id="breakpad_minidump_enable"> + <title>Enabling or Disabling Minidump Generation</title> + <conbody> + <p> + By default, a minidump file is generated when an Impala-related daemon crashes. + To turn off generation of the minidump files, change the + <uicontrol>minidump_path</uicontrol> configuration setting of one or more Impala-related daemons + to the empty string, and restart the corresponding services or daemons. + </p> + + <p rev="IMPALA-3677 CDH-43745"> + In CDH 5.9 / Impala 2.7 and higher, + you can send a <codeph>SIGUSR1</codeph> signal to any Impala-related daemon to write a + Breakpad minidump. For advanced troubleshooting, you can now produce a minidump + without triggering a crash. + </p> + </conbody> + </concept> + + <concept id="breakpad_minidump_location" rev="IMPALA-3581"> + <title>Specifying the Location for Minidump Files</title> + <conbody> + <p> + By default, all minidump files are written to the following location + on the host where a crash occurs: + <!-- Location stated in IMPALA-3581; overridden by different location from IMPALA-2686? + <filepath><varname>log_directory</varname>/minidumps/<varname>daemon_name</varname></filepath> --> + <ul> + <li> + <p> + Clusters managed by Cloudera Manager: <filepath>/var/log/impala-minidumps/<varname>daemon_name</varname></filepath> + </p> + </li> + <li> + <p> + Clusters not managed by Cloudera Manager: + <filepath><varname>impala_log_dir</varname>/<varname>daemon_name</varname>/minidumps/<varname>daemon_name</varname></filepath> + </p> + </li> + </ul> + The minidump files for <cmdname>impalad</cmdname>, <cmdname>catalogd</cmdname>, + and <cmdname>statestored</cmdname> are each written to a separate directory. + </p> + <p> + To specify a different location, set the + <!-- Again, IMPALA-3581 says one thing and IMPALA-2686 / observation of CM interface says another. + <codeph>log_dir</codeph> --> + <uicontrol>minidump_path</uicontrol> + configuration setting of one or more Impala-related daemons, and restart the corresponding services or daemons. + </p> + <p> + If you specify a relative path for this setting, the value is interpreted relative to + the default <uicontrol>minidump_path</uicontrol> directory. + </p> + </conbody> + </concept> + + <concept id="breakpad_minidump_number"> + <title>Controlling the Number of Minidump Files</title> + <conbody> + <p> + Like any files used for logging or troubleshooting, consider limiting the number of + minidump files, or removing unneeded ones, depending on the amount of free storage + space on the hosts in the cluster. + </p> + <p> + Because the minidump files are only used for problem resolution, you can remove any such files that + are not needed to debug current issues. + </p> + <p> + To control how many minidump files Impala keeps around at any one time, + set the <uicontrol>max_minidumps</uicontrol> configuration setting for + of one or more Impala-related daemon, and restart the corresponding services or daemons. + The default for this setting is 9. A zero or negative value is interpreted as + <q>unlimited</q>. + </p> + </conbody> + </concept> + + <concept id="breakpad_minidump_logging"> + <title>Detecting Crash Events</title> + <conbody> + <p> + You can see in the Impala log files or in the Cloudera Manager charts for Impala + when crash events occur that generate minidump files. Because each restart begins + a new log file, the <q>crashed</q> message is always at or near the bottom of the + log file. (There might be another later message if core dumps are also enabled.) + </p> + </conbody> + </concept> + + <concept id="breakpad_support_process" rev="CDH-39818"> + <title>Using the Minidump Files for Problem Resolution</title> + <conbody> + <p> + Typically, you provide minidump files to Cloudera Support as part of problem resolution, in the same way that you might provide a core dump. The <uicontrol>Send Diagnostic Data</uicontrol> + under the <uicontrol>Support</uicontrol> menu in Cloudera Manager guides you through the + process of selecting a time period and volume of diagnostic data, then collects the data + from all hosts and transmits the relevant information for you. + </p> + <fig id="fig_pqw_gvx_pr"> + <title>Send Diagnostic Data choice under Support menu</title> + <image href="../images/support_send_diagnostic_data.png" scalefit="yes" placement="break"/> + </fig> + <p> + Cloudera Support might provide additional instructions about collecting minidumps to better isolate a specific problem. + Because the information in the minidump files is limited to stack traces and register contents, + the possibility of including sensitive information is much lower than with core dump files. + If any sensitive information is included in the minidump, Cloudera Support preserves the confidentiality of that information. + </p> + </conbody> + </concept> + + <concept id="breakpad_demo"> + <title>Demonstration of Breakpad Feature</title> + <conbody> + <p> + The following example uses the command <cmdname>kill -11</cmdname> to + simulate a <codeph>SIGSEGV</codeph> crash for an <cmdname>impalad</cmdname> + process on a single DataNode, then examines the relevant log files and minidump file. + </p> + <p> + First, as root on a worker node, we kill the <cmdname>impalad</cmdname> process with a + <codeph>SIGSEGV</codeph> error. The original process ID was 23114. (Cloudera Manager + restarts the process with a new pid, as shown by the second <cmdname>ps</cmdname> command.) + </p> +<codeblock><![CDATA[ +# ps ax | grep impalad +23114 ? Sl 0:18 /opt/cloudera/parcels/<parcel_version>/lib/impala/sbin-retail/impalad --flagfile=/var/run/cloudera-scm-agent/process/114-impala-IMPALAD/impala-conf/impalad_flags +31259 pts/0 S+ 0:00 grep impalad +# +# kill -11 23114 +# +# ps ax | grep impalad +31374 ? Rl 0:04 /opt/cloudera/parcels/<parcel_version>/lib/impala/sbin-retail/impalad --flagfile=/var/run/cloudera-scm-agent/process/114-impala-IMPALAD/impala-conf/impalad_flags +31475 pts/0 S+ 0:00 grep impalad +]]> +</codeblock> + + <p> + We locate the log directory underneath <filepath>/var/log</filepath>. + There is a <codeph>.INFO</codeph>, <codeph>.WARNING</codeph>, and <codeph>.ERROR</codeph> + log file for the 23114 process ID. The minidump message is written to the + <codeph>.INFO</codeph> file and the <codeph>.ERROR</codeph> file, but not the + <codeph>.WARNING</codeph> file. In this case, a large core file was also produced. + </p> +<codeblock><![CDATA[ +# cd /var/log/impalad +# ls -la | grep 23114 +-rw------- 1 impala impala 3539079168 Jun 23 15:20 core.23114 +-rw-r--r-- 1 impala impala 99057 Jun 23 15:20 hs_err_pid23114.log +-rw-r--r-- 1 impala impala 351 Jun 23 15:20 impalad.worker_node_123.impala.log.ERROR.20160623-140343.23114 +-rw-r--r-- 1 impala impala 29101 Jun 23 15:20 impalad.worker_node_123.impala.log.INFO.20160623-140343.23114 +-rw-r--r-- 1 impala impala 228 Jun 23 14:03 impalad.worker_node_123.impala.log.WARNING.20160623-140343.23114 +]]> +</codeblock> + <p> + The <codeph>.INFO</codeph> log includes the location of the minidump file, followed by + a report of a core dump. With the breakpad minidump feature enabled, now we might + disable core dumps or keep fewer of them around. + </p> +<codeblock><![CDATA[ +# cat impalad.worker_node_123.impala.log.INFO.20160623-140343.23114 +... +Wrote minidump to /var/log/impala-minidumps/impalad/0980da2d-a905-01e1-25ff883a-04ee027a.dmp +# +# A fatal error has been detected by the Java Runtime Environment: +# +# SIGSEGV (0xb) at pc=0x00000030c0e0b68a, pid=23114, tid=139869541455968 +# +# JRE version: Java(TM) SE Runtime Environment (7.0_67-b01) (build 1.7.0_67-b01) +# Java VM: Java HotSpot(TM) 64-Bit Server VM (24.65-b04 mixed mode linux-amd64 compressed oops) +# Problematic frame: +# C [libpthread.so.0+0xb68a] pthread_cond_wait+0xca +# +# Core dump written. Default location: /var/log/impalad/core or core.23114 +# +# An error report file with more information is saved as: +# /var/log/impalad/hs_err_pid23114.log +# +# If you would like to submit a bug report, please visit: +# http://bugreport.sun.com/bugreport/crash.jsp +# The crash happened outside the Java Virtual Machine in native code. +# See problematic frame for where to report the bug. +... + +# cat impalad.worker_node_123.impala.log.ERROR.20160623-140343.23114 + +Log file created at: 2016/06/23 14:03:43 +Running on machine:.worker_node_123 +Log line format: [IWEF]mmdd hh:mm:ss.uuuuuu threadid file:line] msg +E0623 14:03:43.911002 23114 logging.cc:118] stderr will be logged to this file. +Wrote minidump to /var/log/impala-minidumps/impalad/0980da2d-a905-01e1-25ff883a-04ee027a.dmp +]]> +</codeblock> + <p> + The resulting minidump file is much smaller than the corresponding core file, + making it much easier to supply diagnostic information to Cloudera Support. + The transmission process for the minidump files is automated through Cloudera Manager. + </p> +<codeblock><![CDATA[ +# pwd +/var/log/impalad +# cd ../impala-minidumps/impalad +# ls +0980da2d-a905-01e1-25ff883a-04ee027a.dmp +# du -kh * +2.4M 0980da2d-a905-01e1-25ff883a-04ee027a.dmp +]]> +</codeblock> + </conbody> + </concept> + +</concept> http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/bb88fdc0/docs/topics/impala_cdh.xml ---------------------------------------------------------------------- diff --git a/docs/topics/impala_cdh.xml b/docs/topics/impala_cdh.xml new file mode 100644 index 0000000..7ecba74 --- /dev/null +++ b/docs/topics/impala_cdh.xml @@ -0,0 +1,25 @@ +<?xml version="1.0" encoding="UTF-8"?> +<!DOCTYPE concept PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd"> +<concept id="impala_cdh"> + + <title>How Impala Works with CDH</title> + <prolog> + <metadata> + <data name="Category" value="Impala"/> + <data name="Category" value="Concepts"/> + <data name="Category" value="CDH"/> + <data name="Category" value="Administrators"/> + <data name="Category" value="Developers"/> + <data name="Category" value="Data Analysts"/> + </metadata> + </prolog> + + <conbody> + + <p conref="../shared/impala_common.xml#common/impala_overview_diagram"/> + + <p conref="../shared/impala_common.xml#common/component_list"/> + + <p conref="../shared/impala_common.xml#common/query_overview"/> + </conbody> +</concept> http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/bb88fdc0/docs/topics/impala_create_data_source.xml ---------------------------------------------------------------------- diff --git a/docs/topics/impala_create_data_source.xml b/docs/topics/impala_create_data_source.xml new file mode 100644 index 0000000..a93d6bc --- /dev/null +++ b/docs/topics/impala_create_data_source.xml @@ -0,0 +1,35 @@ +<?xml version="1.0" encoding="UTF-8"?> +<!DOCTYPE concept PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd"> +<concept audience="Cloudera" rev="1.4.0" id="create_data_source"> + + <title>CREATE DATA SOURCE Statement</title> + <titlealts audience="PDF"><navtitle>CREATE DATA SOURCE</navtitle></titlealts> + <prolog> + <metadata> + <data name="Category" value="Impala"/> + <data name="Category" value="SQL"/> + <data name="Category" value="DDL"/> + <data name="Category" value="Developers"/> + <data name="Category" value="Data Analysts"/> + </metadata> + </prolog> + + <conbody> + + <p> + <indexterm audience="Cloudera">CREATE DATA SOURCE statement</indexterm> + </p> + + <p conref="../shared/impala_common.xml#common/ddl_blurb"/> + + <p conref="../shared/impala_common.xml#common/sync_ddl_blurb"/> + + <p conref="../shared/impala_common.xml#common/usage_notes_blurb"/> + + <p conref="../shared/impala_common.xml#common/example_blurb"/> + + <p conref="../shared/impala_common.xml#common/cancel_blurb_no"/> + + <p conref="../shared/impala_common.xml#common/related_info"/> + </conbody> +</concept> http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/bb88fdc0/docs/topics/impala_data_sources.xml ---------------------------------------------------------------------- diff --git a/docs/topics/impala_data_sources.xml b/docs/topics/impala_data_sources.xml new file mode 100644 index 0000000..8fb46e1 --- /dev/null +++ b/docs/topics/impala_data_sources.xml @@ -0,0 +1,22 @@ +<?xml version="1.0" encoding="UTF-8"?> +<!DOCTYPE concept PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd"> +<concept rev="1.4.0" id="data_sources"> + + <title>Data Sources</title> + <prolog> + <metadata> + <data name="Category" value="Impala"/> + <data name="Category" value="Developers"/> + <data name="Category" value="Data Analysts"/> + </metadata> + </prolog> + + <conbody> + + <p> + <xref href="impala_create_data_source.xml#create_data_source"/> + <xref href="impala_drop_data_source.xml#drop_data_source"/> + <xref href="impala_create_table.xml#create_table"/> + </p> + </conbody> +</concept> http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/bb88fdc0/docs/topics/impala_date.xml ---------------------------------------------------------------------- diff --git a/docs/topics/impala_date.xml b/docs/topics/impala_date.xml new file mode 100644 index 0000000..7a63aad --- /dev/null +++ b/docs/topics/impala_date.xml @@ -0,0 +1,104 @@ +<?xml version="1.0" encoding="UTF-8"?> +<!DOCTYPE concept PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd"> +<concept audience="Cloudera" id="date" rev="2.0.0"> + + <title>DATE Data Type (CDH 5.3 or higher only)</title> + <prolog> + <metadata> + <data name="Category" value="Impala"/> + <data name="Category" value="Impala Data Types"/> + <data name="Category" value="SQL"/> + <data name="Category" value="Data Analysts"/> + <data name="Category" value="Developers"/> + <data name="Category" value="Dates and Times"/> + </metadata> + </prolog> + + <conbody> + + <p> + <indexterm audience="Cloudera">DATE data type</indexterm> + A type representing the date (year, month, and day) as a single numeric value. Used to represent a broader + date range than possible with the <codeph>TIMESTAMP</codeph> type, with fewer distinct values than + <codeph>TIMESTAMP</codeph>, and in a more compact and efficient form than using a <codeph>STRING</codeph> + such as <codeph>'2014-12-31'</codeph>. + </p> + + <p conref="../shared/impala_common.xml#common/syntax_blurb"/> + +<codeblock><varname>column_name</varname> DATE</codeblock> + + <p> + <b>Range:</b> January 1, -4712 BC .. December 31, 9999 AD. + </p> + + <p conref="../shared/impala_common.xml#common/hbase_ok"/> + + <p conref="../shared/impala_common.xml#common/parquet_blurb"/> + + <ul> + <li> + This type can be read from and written to Parquet files. + </li> + + <li> + There is no requirement for a particular level of Parquet. + </li> + + <li> + Parquet files generated by Impala and containing this type can be freely interchanged with other components + such as Hive and MapReduce. + </li> + </ul> + + <p conref="../shared/impala_common.xml#common/hive_blurb"/> + + <p> + TK. + </p> + + <p conref="../shared/impala_common.xml#common/conversion_blurb"/> + + <p> + TK. + </p> + + <p conref="../shared/impala_common.xml#common/partitioning_blurb"/> + + <p> + This type can be used for partition key columns. Because it has less granularity (and thus fewer distinct + values) than an equivalent <codeph>TIMESTAMP</codeph> column, and numeric columns are more efficient as + partition keys than strings, prefer to partition by a <codeph>DATE</codeph> column rather than a + <codeph>TIMESTAMP</codeph> column or a <codeph>STRING</codeph> representation of a date. + </p> + + <p conref="../shared/impala_common.xml#common/compatibility_blurb"/> + + <p> + This type is available on CDH 5.2 or higher. + </p> + + <p conref="../shared/impala_common.xml#common/internals_2_bytes"/> + + <p conref="../shared/impala_common.xml#common/added_in_20"/> + + <p conref="../shared/impala_common.xml#common/column_stats_constant"/> + + <p conref="../shared/impala_common.xml#common/restrictions_blurb"/> + + <p> + Things happen when converting <codeph>TIMESTAMP</codeph> to <codeph>DATE</codeph> or <codeph>DATE</codeph> to + <codeph>TIMESTAMP</codeph>. TK. + </p> + + <p conref="../shared/impala_common.xml#common/example_blurb"/> + + <p conref="../shared/impala_common.xml#common/related_info"/> + + <p> + The <xref href="impala_timestamp.xml#timestamp">TIMESTAMP</xref> data type is closely related. Some functions + from <xref href="impala_datetime_functions.xml#datetime_functions"/> accept and return <codeph>DATE</codeph> + values. + </p> + </conbody> +</concept> http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/bb88fdc0/docs/topics/impala_disable_cached_reads.xml ---------------------------------------------------------------------- diff --git a/docs/topics/impala_disable_cached_reads.xml b/docs/topics/impala_disable_cached_reads.xml new file mode 100644 index 0000000..bce6091 --- /dev/null +++ b/docs/topics/impala_disable_cached_reads.xml @@ -0,0 +1,36 @@ +<?xml version="1.0" encoding="UTF-8"?> +<!DOCTYPE concept PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd"> +<concept id="disable_cached_reads" rev="1.4.0"> + + <title>DISABLE_CACHED_READS Query Option</title> + <prolog> + <metadata> + <data name="Category" value="Impala"/> + <data name="Category" value="Impala Query Options"/> + <data name="Category" value="HDFS"/> + <data name="Category" value="HDFS Caching"/> + <data name="Category" value="Querying"/> + <data name="Category" value="Performance"/> + <data name="Category" value="Developers"/> + <data name="Category" value="Data Analysts"/> + </metadata> + </prolog> + + <conbody> + + <p> + <indexterm audience="Cloudera">DISABLE_CACHED_READS query option</indexterm> + Prevents Impala from reading data files that are <q>pinned</q> in memory + through the HDFS caching feature. Primarily a debugging option for + cases where processing of HDFS cached data is concentrated on a single + host, leading to excessive CPU usage on that host. + </p> + + <p conref="../shared/impala_common.xml#common/type_boolean"/> + + <p conref="../shared/impala_common.xml#common/default_false"/> + + <p conref="../shared/impala_common.xml#common/added_in_140"/> + + </conbody> +</concept> http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/bb88fdc0/docs/topics/impala_disable_outermost_topn.xml ---------------------------------------------------------------------- diff --git a/docs/topics/impala_disable_outermost_topn.xml b/docs/topics/impala_disable_outermost_topn.xml new file mode 100644 index 0000000..24fcd2a --- /dev/null +++ b/docs/topics/impala_disable_outermost_topn.xml @@ -0,0 +1,29 @@ +<?xml version="1.0" encoding="UTF-8"?> +<!DOCTYPE concept PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd"> +<concept id="disable_outermost_topn" rev="2.5.0"> + + <title>DISABLE_OUTERMOST_TOPN Query Option</title> + <prolog> + <metadata> + <data name="Category" value="Impala"/> + <data name="Category" value="Impala Query Options"/> + <data name="Category" value="Developers"/> + <data name="Category" value="Data Analysts"/> + </metadata> + </prolog> + + <conbody> + + <p rev="2.5.0"> + <indexterm audience="Cloudera">DISABLE_OUTERMOST_TOPN query option</indexterm> + </p> + + <p> + <b>Type:</b> + </p> + + <p> + <b>Default:</b> + </p> + </conbody> +</concept> http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/bb88fdc0/docs/topics/impala_disable_row_runtime_filtering.xml ---------------------------------------------------------------------- diff --git a/docs/topics/impala_disable_row_runtime_filtering.xml b/docs/topics/impala_disable_row_runtime_filtering.xml new file mode 100644 index 0000000..4ba6e7a --- /dev/null +++ b/docs/topics/impala_disable_row_runtime_filtering.xml @@ -0,0 +1,65 @@ +<?xml version="1.0" encoding="UTF-8"?> +<!DOCTYPE concept PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd"> +<concept id="disable_row_runtime_filtering" rev="2.5.0"> + + <title>DISABLE_ROW_RUNTIME_FILTERING Query Option (CDH 5.7 or higher only)</title> + <titlealts audience="PDF"><navtitle>DISABLE_ROW_RUNTIME_FILTERING</navtitle></titlealts> + <prolog> + <metadata> + <data name="Category" value="Impala"/> + <data name="Category" value="Impala Query Options"/> + <data name="Category" value="Performance"/> + <data name="Category" value="Developers"/> + <data name="Category" value="Data Analysts"/> + </metadata> + </prolog> + + <conbody> + + <p rev="2.5.0"> + <indexterm audience="Cloudera">DISABLE_ROW_RUNTIME_FILTERING query option</indexterm> + The <codeph>DISABLE_ROW_RUNTIME_FILTERING</codeph> query option + reduces the scope of the runtime filtering feature. Queries still dynamically prune + partitions, but do not apply the filtering logic to individual rows within partitions. + </p> + + <p> + Only applies to queries against Parquet tables. For other file formats, Impala + only prunes at the level of partitions, not individual rows. + </p> + + <p conref="../shared/impala_common.xml#common/type_boolean"/> + <p conref="../shared/impala_common.xml#common/default_false"/> + + <p conref="../shared/impala_common.xml#common/added_in_250"/> + + <p conref="../shared/impala_common.xml#common/usage_notes_blurb"/> + + <p> + Impala automatically evaluates whether the per-row filters are being + effective at reducing the amount of intermediate data. Therefore, + this option is typically only needed for the rare case where Impala + cannot accurately determine how effective the per-row filtering is + for a query. + </p> + + <p conref="../shared/impala_common.xml#common/runtime_filtering_option_caveat"/> + + <p> + Because this setting only improves query performance in very specific + circumstances, depending on the query characteristics and data distribution, + only use it when you determine through benchmarking that it improves + performance of specific expensive queries. + Consider setting this query option immediately before the expensive query and + unsetting it immediately afterward. + </p> + + <p conref="../shared/impala_common.xml#common/related_info"/> + <p> + <xref href="impala_runtime_filtering.xml"/>, + <xref href="impala_runtime_filter_mode.xml#runtime_filter_mode"/> + <!-- , <xref href="impala_partitioning.xml#dynamic_partition_pruning"/> --> + </p> + + </conbody> +</concept> http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/bb88fdc0/docs/topics/impala_disable_streaming_preaggregations.xml ---------------------------------------------------------------------- diff --git a/docs/topics/impala_disable_streaming_preaggregations.xml b/docs/topics/impala_disable_streaming_preaggregations.xml new file mode 100644 index 0000000..683a16d --- /dev/null +++ b/docs/topics/impala_disable_streaming_preaggregations.xml @@ -0,0 +1,45 @@ +<?xml version="1.0" encoding="UTF-8"?> +<!DOCTYPE concept PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd"> +<concept id="disable_streaming_preaggregations" rev="2.5.0 IMPALA-1305"> + + <title>DISABLE_STREAMING_PREAGGREGATIONS Query Option (CDH 5.7 or higher only)</title> + <titlealts audience="PDF"><navtitle>DISABLE_STREAMING_PREAGGREGATIONS</navtitle></titlealts> + <prolog> + <metadata> + <data name="Category" value="Impala"/> + <data name="Category" value="Impala Query Options"/> + <data name="Category" value="Performance"/> + <data name="Category" value="Aggregate Functions"/> + <data name="Category" value="Troubleshooting"/> + <data name="Category" value="Developers"/> + <data name="Category" value="Data Analysts"/> + </metadata> + </prolog> + + <conbody> + + <p rev="2.5.0 IMPALA-1305"> + <indexterm audience="Cloudera">DISABLE_STREAMING_PREAGGREGATIONS query option</indexterm> + Turns off the <q>streaming preaggregation</q> optimization that is available in CDH 5.7 / Impala 2.5 + and higher. This optimization reduces unnecessary work performed by queries that perform aggregation + operations on columns with few or no duplicate values, for example <codeph>DISTINCT <varname>id_column</varname></codeph> + or <codeph>GROUP BY <varname>unique_column</varname></codeph>. If the optimization causes regressions in + existing queries that use aggregation functions, you can turn it off as needed by setting this query option. + </p> + + <p conref="../shared/impala_common.xml#common/type_boolean"/> + <p conref="../shared/impala_common.xml#common/default_false_0"/> + + <note conref="../shared/impala_common.xml#common/one_but_not_true"/> + + <p conref="../shared/impala_common.xml#common/usage_notes_blurb"/> + <p> + Typically, queries that would require enabling this option involve very large numbers of + aggregated values, such as a billion or more distinct keys being processed on each + worker node. + </p> + + <p conref="../shared/impala_common.xml#common/added_in_250"/> + + </conbody> +</concept> http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/bb88fdc0/docs/topics/impala_drop_data_source.xml ---------------------------------------------------------------------- diff --git a/docs/topics/impala_drop_data_source.xml b/docs/topics/impala_drop_data_source.xml new file mode 100644 index 0000000..be6167c --- /dev/null +++ b/docs/topics/impala_drop_data_source.xml @@ -0,0 +1,35 @@ +<?xml version="1.0" encoding="UTF-8"?> +<!DOCTYPE concept PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd"> +<concept audience="Cloudera" rev="1.4.0" id="drop_data_source"> + + <title>DROP DATA SOURCE Statement</title> + <titlealts audience="PDF"><navtitle>DROP DATA SOURCE</navtitle></titlealts> + <prolog> + <metadata> + <data name="Category" value="Impala"/> + <data name="Category" value="SQL"/> + <data name="Category" value="DDL"/> + <data name="Category" value="Developers"/> + <data name="Category" value="Data Analysts"/> + </metadata> + </prolog> + + <conbody> + + <p> + <indexterm audience="Cloudera">DROP DATA SOURCE statement</indexterm> + </p> + + <p conref="../shared/impala_common.xml#common/syntax_blurb"/> + + <p conref="../shared/impala_common.xml#common/ddl_blurb"/> + + <p conref="../shared/impala_common.xml#common/related_info"/> + + <p conref="../shared/impala_common.xml#common/usage_notes_blurb"/> + + <p conref="../shared/impala_common.xml#common/example_blurb"/> + + <p conref="../shared/impala_common.xml#common/cancel_blurb_no"/> + </conbody> +</concept>
