Add Impala 2.9 docs from master branch, with commit hash f1a3d8e14dae4948ce77e2f85e036d83f2d8b246
Change-Id: Id2209088d0120a7b1243113466b33159a4f8a25e Reviewed-on: http://gerrit.cloudera.org:8080/7397 Reviewed-by: John Russell <[email protected]> Tested-by: John Russell <[email protected]> Project: http://git-wip-us.apache.org/repos/asf/incubator-impala/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-impala/commit/ae2f8d03 Tree: http://git-wip-us.apache.org/repos/asf/incubator-impala/tree/ae2f8d03 Diff: http://git-wip-us.apache.org/repos/asf/incubator-impala/diff/ae2f8d03 Branch: refs/heads/asf-site Commit: ae2f8d035e28dcee5d334eabf91ffb7c8062ff2c Parents: 3d1c7a5 Author: Ambreen Kazi <[email protected]> Authored: Tue Jul 11 12:21:06 2017 -0700 Committer: John Russell <[email protected]> Committed: Wed Jul 12 07:16:14 2017 +0000 ---------------------------------------------------------------------- docs/build/html/index.html | 2 +- docs/build/html/topics/impala_adls.html | 645 ++++++++ docs/build/html/topics/impala_alter_table.html | 35 +- docs/build/html/topics/impala_appx_median.html | 5 + docs/build/html/topics/impala_array.html | 4 +- docs/build/html/topics/impala_auditing.html | 32 +- .../build/html/topics/impala_authorization.html | 3 +- docs/build/html/topics/impala_char.html | 2 +- docs/build/html/topics/impala_components.html | 6 + .../build/html/topics/impala_compute_stats.html | 2 +- .../topics/impala_conditional_functions.html | 54 + docs/build/html/topics/impala_create_table.html | 80 + .../html/topics/impala_datetime_functions.html | 31 +- docs/build/html/topics/impala_decimal.html | 2 +- docs/build/html/topics/impala_decimal_v2.html | 32 + .../impala_default_join_distribution_mode.html | 113 ++ docs/build/html/topics/impala_describe.html | 2 +- docs/build/html/topics/impala_double.html | 11 + docs/build/html/topics/impala_explain.html | 23 +- docs/build/html/topics/impala_explain_plan.html | 4 +- docs/build/html/topics/impala_fixed_issues.html | 1448 +++++++++--------- docs/build/html/topics/impala_float.html | 15 + docs/build/html/topics/impala_group_concat.html | 26 +- docs/build/html/topics/impala_hints.html | 127 +- .../topics/impala_incompatible_changes.html | 156 +- docs/build/html/topics/impala_insert.html | 34 +- docs/build/html/topics/impala_kerberos.html | 35 + docs/build/html/topics/impala_known_issues.html | 421 +++-- docs/build/html/topics/impala_kudu.html | 94 +- docs/build/html/topics/impala_literals.html | 2 +- docs/build/html/topics/impala_load_data.html | 16 + docs/build/html/topics/impala_logging.html | 12 +- docs/build/html/topics/impala_map.html | 4 +- .../html/topics/impala_math_functions.html | 8 +- docs/build/html/topics/impala_new_features.html | 161 +- docs/build/html/topics/impala_parquet.html | 55 +- .../html/topics/impala_parquet_file_size.html | 8 + .../build/html/topics/impala_query_options.html | 4 +- docs/build/html/topics/impala_refresh.html | 23 +- .../html/topics/impala_runtime_filtering.html | 4 +- docs/build/html/topics/impala_scalability.html | 173 ++- .../build/html/topics/impala_shell_options.html | 17 + docs/build/html/topics/impala_show.html | 2 +- .../html/topics/impala_string_functions.html | 241 ++- docs/build/html/topics/impala_struct.html | 4 +- docs/build/html/topics/impala_timeouts.html | 52 +- docs/build/html/topics/impala_timestamp.html | 127 +- .../html/topics/impala_troubleshooting.html | 2 +- docs/build/html/topics/impala_udf.html | 18 + docs/build/html/topics/impala_varchar.html | 2 +- docs/build/impala-2.8.pdf | Bin 0 -> 3653059 bytes docs/build/impala-2.9.pdf | Bin 0 -> 3748639 bytes docs/build/impala.pdf | Bin 3653059 -> 0 bytes impala-docs.html | 8 +- 54 files changed, 3236 insertions(+), 1151 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/ae2f8d03/docs/build/html/index.html ---------------------------------------------------------------------- diff --git a/docs/build/html/index.html b/docs/build/html/index.html index faad535..abf79ec 100644 --- a/docs/build/html/index.html +++ b/docs/build/html/index.html @@ -1,3 +1,3 @@ <!DOCTYPE html SYSTEM "about:legacy-compat"> -<html lang="en"><head><meta http-equiv="Content-Type" content="text/html; charset=UTF-8"><meta charset="UTF-8"><meta name="copyright" content="(C) Copyright 2017"><meta name="DC.rights.owner" content="(C) Copyright 2017"><meta name="DC.Type" content="map"><meta name="DC.Format" content="XHTML"><meta name="DC.Identifier" content="impala"><link rel="stylesheet" type="text/css" href="commonltr.css"><title>Apache Impala (incubating) Guide</title></head><body id="impala"><h1 class="title topictitle1">Apache Impala (incubating) Guide</h1><nav><ul class="map"><li class="topicref"><a href="topics/impala_intro.html">Introducing Apache Impala (incubating)</a></li><li class="topicref"><a href="topics/impala_concepts.html">Concepts and Architecture</a><ul><li class="topicref"><a href="topics/impala_components.html">Components</a></li><li class="topicref"><a href="topics/impala_development.html">Developing Applications</a></li><li class="topicref"><a href="topics/impala_hadoop.html">Role in the Hadoop Ecosystem</a></li></ul></li><li class="topicref"><a href="topics/impala_planning.html">Deployment Planning</a><ul><li class="topicref"><a href="topics/impala_prereqs.html#prereqs">Requirements</a></li><li class="topicref"><a href="topics/impala_cluster_sizing.html">Cluster Sizing</a></li><li class="topicref"><a href="topics/impala_schema_design.html">Designing Schemas</a></li></ul></li><li class="topicref"><a href="topics/impala_install.html#install">Installing Impala</a></li><li class="topicref"><a href="topics/impala_config.html">Managing Impala</a><ul><li class="topicref"><a href="topics/impala_config_performance.html">Post-Installation Configuration for Impala</a></li><li class="topicref"><a href="topics/impala_odbc.html">Configuring Impala to Work with ODBC</a></li><li class="topicref"><a href="topics/impala_jdbc.html">Configuring Impala to Work with JDBC</a></li></ul></li><li class="topicref"><a href="topics/impala_upgrading.html">Upgrading Impala</a></li><li class="top icref"><a href="topics/impala_processes.html">Starting Impala</a><ul><li class="topicref"><a href="topics/impala_config_options.html">Modifying Impala Startup Options</a></li></ul></li><li class="topicref"><a href="topics/impala_tutorial.html">Tutorials</a></li><li class="topicref"><a href="topics/impala_admin.html">Administration</a><ul><li class="topicref"><a href="topics/impala_admission.html">Admission Control and Query Queuing</a></li><li class="topicref"><a href="topics/impala_resource_management.html">Resource Management for Impala</a></li><li class="topicref"><a href="topics/impala_timeouts.html">Setting Timeouts</a></li><li class="topicref"><a href="topics/impala_proxy.html">Load-Balancing Proxy for HA</a></li><li class="topicref"><a href="topics/impala_disk_space.html">Managing Disk Space</a></li></ul></li><li class="topicref"><a href="topics/impala_security.html">Impala Security</a><ul><li class="topicref"><a href="topics/impala_security_guidelines.html">Security Guidelin es for Impala</a></li><li class="topicref"><a href="topics/impala_security_files.html">Securing Impala Data and Log Files</a></li><li class="topicref"><a href="topics/impala_security_install.html">Installation Considerations for Impala Security</a></li><li class="topicref"><a href="topics/impala_security_metastore.html">Securing the Hive Metastore Database</a></li><li class="topicref"><a href="topics/impala_security_webui.html">Securing the Impala Web User Interface</a></li><li class="topicref"><a href="topics/impala_ssl.html">Configuring TLS/SSL for Impala</a></li><li class="topicref"><a href="topics/impala_authorization.html">Enabling Sentry Authorization for Impala</a></li><li class="topicref"><a href="topics/impala_authentication.html">Impala Authentication</a><ul><li class="topicref"><a href="topics/impala_kerberos.html">Enabling Kerberos Authentication for Impala</a></li><li class="topicref"><a href="topics/impala_ldap.html">Enabling LDAP Authentication for Impala</a></li><li class="topicref"><a href="topics/impala_mixed_security.html">Using Multiple Authentication Methods with Impala</a></li><li class="topicref"><a href="topics/impala_delegation.html">Configuring Impala Delegation for Hue and BI Tools</a></li></ul></li><li class="topicref"><a href="topics/impala_auditing.html">Auditing</a></li><li class="topicref"><a href="topics/impala_lineage.html">Viewing Lineage Info</a></li></ul></li><li class="topicref"><a href="topics/impala_langref.html">SQL Reference</a><ul><li class="topicref"><a href="topics/impala_comments.html">Comments</a></li><li class="topicref"><a href="topics/impala_datatypes.html">Data Types</a><ul><li class="topicref"><a href="topics/impala_array.html">ARRAY Complex Type (Impala 2.3 or higher only)</a></li><li class="topicref"><a href="topics/impala_bigint.html">BIGINT</a></li><li class="topicref"><a href="topics/impala_boolean.html">BOOLEAN</a></li><li class="topicref"><a href="topics/impala_char.html">CHAR</a></li><li class="topicr ef"><a href="topics/impala_decimal.html">DECIMAL</a></li><li class="topicref"><a href="topics/impala_double.html">DOUBLE</a></li><li class="topicref"><a href="topics/impala_float.html">FLOAT</a></li><li class="topicref"><a href="topics/impala_int.html">INT</a></li><li class="topicref"><a href="topics/impala_map.html">MAP Complex Type (Impala 2.3 or higher only)</a></li><li class="topicref"><a href="topics/impala_real.html">REAL</a></li><li class="topicref"><a href="topics/impala_smallint.html">SMALLINT</a></li><li class="topicref"><a href="topics/impala_string.html">STRING</a></li><li class="topicref"><a href="topics/impala_struct.html">STRUCT Complex Type (Impala 2.3 or higher only)</a></li><li class="topicref"><a href="topics/impala_timestamp.html">TIMESTAMP</a></li><li class="topicref"><a href="topics/impala_tinyint.html">TINYINT</a></li><li class="topicref"><a href="topics/impala_varchar.html">VARCHAR</a></li><li class="topicref"><a href="topics/impala_complex_types.html">Comple x Types (Impala 2.3 or higher only)</a></li></ul></li><li class="topicref"><a href="topics/impala_literals.html">Literals</a></li><li class="topicref"><a href="topics/impala_operators.html">SQL Operators</a></li><li class="topicref"><a href="topics/impala_schema_objects.html">Schema Objects and Object Names</a><ul><li class="topicref"><a href="topics/impala_aliases.html">Aliases</a></li><li class="topicref"><a href="topics/impala_databases.html">Databases</a></li><li class="topicref"><a href="topics/impala_functions_overview.html">Functions</a></li><li class="topicref"><a href="topics/impala_identifiers.html">Identifiers</a></li><li class="topicref"><a href="topics/impala_tables.html">Tables</a></li><li class="topicref"><a href="topics/impala_views.html">Views</a></li></ul></li><li class="topicref"><a href="topics/impala_langref_sql.html">SQL Statements</a><ul><li class="topicref"><a href="topics/impala_ddl.html">DDL Statements</a></li><li class="topicref"><a href="topics/impala_dml .html">DML Statements</a></li><li class="topicref"><a href="topics/impala_alter_table.html">ALTER TABLE</a></li><li class="topicref"><a href="topics/impala_alter_view.html">ALTER VIEW</a></li><li class="topicref"><a href="topics/impala_compute_stats.html">COMPUTE STATS</a></li><li class="topicref"><a href="topics/impala_create_database.html">CREATE DATABASE</a></li><li class="topicref"><a href="topics/impala_create_function.html">CREATE FUNCTION</a></li><li class="topicref"><a href="topics/impala_create_role.html">CREATE ROLE</a></li><li class="topicref"><a href="topics/impala_create_table.html">CREATE TABLE</a></li><li class="topicref"><a href="topics/impala_create_view.html">CREATE VIEW</a></li><li class="topicref"><a href="topics/impala_delete.html">DELETE</a></li><li class="topicref"><a href="topics/impala_describe.html">DESCRIBE</a></li><li class="topicref"><a href="topics/impala_drop_database.html">DROP DATABASE</a></li><li class="topicref"><a href="topics/impala_drop_function .html">DROP FUNCTION</a></li><li class="topicref"><a href="topics/impala_drop_role.html">DROP ROLE</a></li><li class="topicref"><a href="topics/impala_drop_stats.html">DROP STATS</a></li><li class="topicref"><a href="topics/impala_drop_table.html">DROP TABLE</a></li><li class="topicref"><a href="topics/impala_drop_view.html">DROP VIEW</a></li><li class="topicref"><a href="topics/impala_explain.html">EXPLAIN</a></li><li class="topicref"><a href="topics/impala_grant.html">GRANT</a></li><li class="topicref"><a href="topics/impala_insert.html">INSERT</a></li><li class="topicref"><a href="topics/impala_invalidate_metadata.html">INVALIDATE METADATA</a></li><li class="topicref"><a href="topics/impala_load_data.html">LOAD DATA</a></li><li class="topicref"><a href="topics/impala_refresh.html">REFRESH</a></li><li class="topicref"><a href="topics/impala_revoke.html">REVOKE</a></li><li class="topicref"><a href="topics/impala_select.html">SELECT</a><ul><li class="topicref"><a href="topics/impala _joins.html">Joins</a></li><li class="topicref"><a href="topics/impala_order_by.html">ORDER BY Clause</a></li><li class="topicref"><a href="topics/impala_group_by.html">GROUP BY Clause</a></li><li class="topicref"><a href="topics/impala_having.html">HAVING Clause</a></li><li class="topicref"><a href="topics/impala_limit.html">LIMIT Clause</a></li><li class="topicref"><a href="topics/impala_offset.html">OFFSET Clause</a></li><li class="topicref"><a href="topics/impala_union.html">UNION Clause</a></li><li class="topicref"><a href="topics/impala_subqueries.html">Subqueries</a></li><li class="topicref"><a href="topics/impala_with.html">WITH Clause</a></li><li class="topicref"><a href="topics/impala_distinct.html">DISTINCT Operator</a></li><li class="topicref"><a href="topics/impala_hints.html">Hints</a></li></ul></li><li class="topicref"><a href="topics/impala_set.html">SET</a><ul><li class="topicref"><a href="topics/impala_query_options.html">Query Options for the SET Statement</a><ul> <li class="topicref"><a href="topics/impala_abort_on_default_limit_exceeded.html">ABORT_ON_DEFAULT_LIMIT_EXCEEDED</a></li><li class="topicref"><a href="topics/impala_abort_on_error.html">ABORT_ON_ERROR</a></li><li class="topicref"><a href="topics/impala_allow_unsupported_formats.html">ALLOW_UNSUPPORTED_FORMATS</a></li><li class="topicref"><a href="topics/impala_appx_count_distinct.html">APPX_COUNT_DISTINCT</a></li><li class="topicref"><a href="topics/impala_batch_size.html">BATCH_SIZE</a></li><li class="topicref"><a href="topics/impala_compression_codec.html">COMPRESSION_CODEC</a></li><li class="topicref"><a href="topics/impala_debug_action.html">DEBUG_ACTION</a></li><li class="topicref"><a href="topics/impala_default_order_by_limit.html">DEFAULT_ORDER_BY_LIMIT</a></li><li class="topicref"><a href="topics/impala_disable_codegen.html">DISABLE_CODEGEN</a></li><li class="topicref"><a href="topics/impala_disable_row_runtime_filtering.html">DISABLE_ROW_RUNTIME_FILTERING</a></li><li class ="topicref"><a href="topics/impala_disable_streaming_preaggregations.html">DISABLE_STREAMING_PREAGGREGATIONS</a></li><li class="topicref"><a href="topics/impala_disable_unsafe_spills.html">DISABLE_UNSAFE_SPILLS</a></li><li class="topicref"><a href="topics/impala_exec_single_node_rows_threshold.html">EXEC_SINGLE_NODE_ROWS_THRESHOLD</a></li><li class="topicref"><a href="topics/impala_explain_level.html">EXPLAIN_LEVEL</a></li><li class="topicref"><a href="topics/impala_hbase_cache_blocks.html">HBASE_CACHE_BLOCKS</a></li><li class="topicref"><a href="topics/impala_hbase_caching.html">HBASE_CACHING</a></li><li class="topicref"><a href="topics/impala_live_progress.html">LIVE_PROGRESS</a></li><li class="topicref"><a href="topics/impala_live_summary.html">LIVE_SUMMARY</a></li><li class="topicref"><a href="topics/impala_max_errors.html">MAX_ERRORS</a></li><li class="topicref"><a href="topics/impala_max_io_buffers.html">MAX_IO_BUFFERS</a></li><li class="topicref"><a href="topics/impala_max_sc an_range_length.html">MAX_SCAN_RANGE_LENGTH</a></li><li class="topicref"><a href="topics/impala_max_num_runtime_filters.html">MAX_NUM_RUNTIME_FILTERS</a></li><li class="topicref"><a href="topics/impala_mem_limit.html">MEM_LIMIT</a></li><li class="topicref"><a href="topics/impala_mt_dop.html">MT_DOP</a></li><li class="topicref"><a href="topics/impala_num_nodes.html">NUM_NODES</a></li><li class="topicref"><a href="topics/impala_num_scanner_threads.html">NUM_SCANNER_THREADS</a></li><li class="topicref"><a href="topics/impala_optimize_partition_key_scans.html">OPTIMIZE_PARTITION_KEY_SCANS</a></li><li class="topicref"><a href="topics/impala_parquet_compression_codec.html">PARQUET_COMPRESSION_CODEC</a></li><li class="topicref"><a href="topics/impala_parquet_annotate_strings_utf8.html">PARQUET_ANNOTATE_STRINGS_UTF8</a></li><li class="topicref"><a href="topics/impala_parquet_fallback_schema_resolution.html">PARQUET_FALLBACK_SCHEMA_RESOLUTION</a></li><li class="topicref"><a href="topics/impa la_parquet_file_size.html">PARQUET_FILE_SIZE</a></li><li class="topicref"><a href="topics/impala_prefetch_mode.html">PREFETCH_MODE</a></li><li class="topicref"><a href="topics/impala_query_timeout_s.html">QUERY_TIMEOUT_S</a></li><li class="topicref"><a href="topics/impala_request_pool.html">REQUEST_POOL</a></li><li class="topicref"><a href="topics/impala_replica_preference.html">REPLICA_PREFERENCE</a></li><li class="topicref"><a href="topics/impala_reservation_request_timeout.html">RESERVATION_REQUEST_TIMEOUT</a></li><li class="topicref"><a href="topics/impala_runtime_bloom_filter_size.html">RUNTIME_BLOOM_FILTER_SIZE</a></li><li class="topicref"><a href="topics/impala_runtime_filter_max_size.html">RUNTIME_FILTER_MAX_SIZE</a></li><li class="topicref"><a href="topics/impala_runtime_filter_min_size.html">RUNTIME_FILTER_MIN_SIZE</a></li><li class="topicref"><a href="topics/impala_runtime_filter_mode.html">RUNTIME_FILTER_MODE</a></li><li class="topicref"><a href="topics/impala_runtime_fi lter_wait_time_ms.html">RUNTIME_FILTER_WAIT_TIME_MS</a></li><li class="topicref"><a href="topics/impala_s3_skip_insert_staging.html">S3_SKIP_INSERT_STAGING</a></li><li class="topicref"><a href="topics/impala_scan_node_codegen_threshold.html">SCAN_NODE_CODEGEN_THRESHOLD</a></li><li class="topicref"><a href="topics/impala_scratch_limit.html">SCRATCH_LIMIT</a></li><li class="topicref"><a href="topics/impala_schedule_random_replica.html">SCHEDULE_RANDOM_REPLICA</a></li><li class="topicref"><a href="topics/impala_support_start_over.html">SUPPORT_START_OVER</a></li><li class="topicref"><a href="topics/impala_sync_ddl.html">SYNC_DDL</a></li><li class="topicref"><a href="topics/impala_v_cpu_cores.html">V_CPU_CORES</a></li></ul></li></ul></li><li class="topicref"><a href="topics/impala_show.html">SHOW</a></li><li class="topicref"><a href="topics/impala_truncate_table.html">TRUNCATE TABLE</a></li><li class="topicref"><a href="topics/impala_update.html">UPDATE</a></li><li class="topicref"><a h ref="topics/impala_upsert.html">UPSERT</a></li><li class="topicref"><a href="topics/impala_use.html">USE</a></li></ul></li><li class="topicref"><a href="topics/impala_functions.html">Built-In Functions</a><ul><li class="topicref"><a href="topics/impala_math_functions.html">Mathematical Functions</a></li><li class="topicref"><a href="topics/impala_bit_functions.html">Bit Functions</a></li><li class="topicref"><a href="topics/impala_conversion_functions.html">Type Conversion Functions</a></li><li class="topicref"><a href="topics/impala_datetime_functions.html">Date and Time Functions</a></li><li class="topicref"><a href="topics/impala_conditional_functions.html">Conditional Functions</a></li><li class="topicref"><a href="topics/impala_string_functions.html">String Functions</a></li><li class="topicref"><a href="topics/impala_misc_functions.html">Miscellaneous Functions</a></li><li class="topicref"><a href="topics/impala_aggregate_functions.html">Aggregate Functions</a><ul><li class="t opicref"><a href="topics/impala_appx_median.html">APPX_MEDIAN</a></li><li class="topicref"><a href="topics/impala_avg.html">AVG</a></li><li class="topicref"><a href="topics/impala_count.html">COUNT</a></li><li class="topicref"><a href="topics/impala_group_concat.html">GROUP_CONCAT</a></li><li class="topicref"><a href="topics/impala_max.html">MAX</a></li><li class="topicref"><a href="topics/impala_min.html">MIN</a></li><li class="topicref"><a href="topics/impala_ndv.html">NDV</a></li><li class="topicref"><a href="topics/impala_stddev.html">STDDEV, STDDEV_SAMP, STDDEV_POP</a></li><li class="topicref"><a href="topics/impala_sum.html">SUM</a></li><li class="topicref"><a href="topics/impala_variance.html">VARIANCE, VARIANCE_SAMP, VARIANCE_POP, VAR_SAMP, VAR_POP</a></li></ul></li><li class="topicref"><a href="topics/impala_analytic_functions.html">Analytic Functions</a></li><li class="topicref"><a href="topics/impala_udf.html">Impala User-Defined Functions (UDFs)</a></li></ul></li><li cla ss="topicref"><a href="topics/impala_langref_unsupported.html">SQL Differences Between Impala and Hive</a></li><li class="topicref"><a href="topics/impala_porting.html">Porting SQL</a></li></ul></li><li class="topicref"><a href="topics/impala_impala_shell.html">The Impala Shell</a><ul><li class="topicref"><a href="topics/impala_shell_options.html">Configuration Options</a></li><li class="topicref"><a href="topics/impala_connecting.html">Connecting to impalad</a></li><li class="topicref"><a href="topics/impala_shell_running_commands.html">Running Commands and SQL Statements</a></li><li class="topicref"><a href="topics/impala_shell_commands.html">Command Reference</a></li></ul></li><li class="topicref"><a href="topics/impala_performance.html">Performance Tuning</a><ul><li class="topicref"><a href="topics/impala_perf_cookbook.html">Performance Best Practices</a></li><li class="topicref"><a href="topics/impala_perf_joins.html">Join Performance</a></li><li class="topicref"><a href="topic s/impala_perf_stats.html">Table and Column Statistics</a></li><li class="topicref"><a href="topics/impala_perf_benchmarking.html">Benchmarking</a></li><li class="topicref"><a href="topics/impala_perf_resources.html">Controlling Resource Usage</a></li><li class="topicref"><a href="topics/impala_runtime_filtering.html">Runtime Filtering</a></li><li class="topicref"><a href="topics/impala_perf_hdfs_caching.html">HDFS Caching</a></li><li class="topicref"><a href="topics/impala_perf_testing.html">Testing Impala Performance</a></li><li class="topicref"><a href="topics/impala_explain_plan.html">EXPLAIN Plans and Query Profiles</a></li><li class="topicref"><a href="topics/impala_perf_skew.html">HDFS Block Skew</a></li></ul></li><li class="topicref"><a href="topics/impala_scalability.html">Scalability Considerations</a></li><li class="topicref"><a href="topics/impala_partitioning.html">Partitioning</a></li><li class="topicref"><a href="topics/impala_file_formats.html">File Formats</a><ul><li class="topicref"><a href="topics/impala_txtfile.html">Text Data Files</a></li><li class="topicref"><a href="topics/impala_parquet.html">Parquet Data Files</a></li><li class="topicref"><a href="topics/impala_avro.html">Avro Data Files</a></li><li class="topicref"><a href="topics/impala_rcfile.html">RCFile Data Files</a></li><li class="topicref"><a href="topics/impala_seqfile.html">SequenceFile Data Files</a></li></ul></li><li class="topicref"><a href="topics/impala_kudu.html">Using Impala to Query Kudu Tables</a></li><li class="topicref"><a href="topics/impala_hbase.html">HBase Tables</a></li><li class="topicref"><a href="topics/impala_s3.html">S3 Tables</a></li><li class="topicref"><a href="topics/impala_isilon.html">Isilon Storage</a></li><li class="topicref"><a href="topics/impala_logging.html">Logging</a></li><li class="topicref"><a href="topics/impala_troubleshooting.html">Troubleshooting Impala</a><ul><li class="topicref"><a href="topics/impala_webui.html">Web User Interface</ a></li><li class="topicref"><a href="topics/impala_breakpad.html">Breakpad Minidumps</a></li></ul></li><li class="topicref"><a href="topics/impala_ports.html">Ports Used by Impala</a></li><li class="topicref"><a href="topics/impala_reserved_words.html">Impala Reserved Words</a></li><li class="topicref"><a href="topics/impala_faq.html">Impala Frequently Asked Questions</a></li><li class="topicref"><a href="topics/impala_release_notes.html">Impala Release Notes</a><ul><li class="topicref"><a href="topics/impala_relnotes.html">Impala Release Notes</a></li><li class="topicref"><a href="topics/impala_new_features.html">New Features in Apache Impala (incubating)</a></li><li class="topicref"><a href="topics/impala_incompatible_changes.html">Incompatible Changes and Limitations in Apache Impala (incubating)</a></li><li class="topicref"><a href="topics/impala_known_issues.html">Known Issues and Workarounds in Impala</a></li><li class="topicref"><a href="topics/impala_fixed_issues.html">Fixed Issues in Apache Impala (incubating)</a></li></ul></li></ul></nav></body></html> \ No newline at end of file +<html lang="en"><head><meta http-equiv="Content-Type" content="text/html; charset=UTF-8"><meta charset="UTF-8"><meta name="copyright" content="(C) Copyright 2017"><meta name="DC.rights.owner" content="(C) Copyright 2017"><meta name="DC.Type" content="map"><meta name="DC.Format" content="XHTML"><meta name="DC.Identifier" content="impala"><link rel="stylesheet" type="text/css" href="commonltr.css"><title>Apache Impala (incubating) Guide</title></head><body id="impala"><h1 class="title topictitle1">Apache Impala (incubating) Guide</h1><nav><ul class="map"><li class="topicref"><a href="topics/impala_intro.html">Introducing Apache Impala (incubating)</a></li><li class="topicref"><a href="topics/impala_concepts.html">Concepts and Architecture</a><ul><li class="topicref"><a href="topics/impala_components.html">Components</a></li><li class="topicref"><a href="topics/impala_development.html">Developing Applications</a></li><li class="topicref"><a href="topics/impala_hadoop.html">Role in the Hadoop Ecosystem</a></li></ul></li><li class="topicref"><a href="topics/impala_planning.html">Deployment Planning</a><ul><li class="topicref"><a href="topics/impala_prereqs.html#prereqs">Requirements</a></li><li class="topicref"><a href="topics/impala_cluster_sizing.html">Cluster Sizing</a></li><li class="topicref"><a href="topics/impala_schema_design.html">Designing Schemas</a></li></ul></li><li class="topicref"><a href="topics/impala_install.html#install">Installing Impala</a></li><li class="topicref"><a href="topics/impala_config.html">Managing Impala</a><ul><li class="topicref"><a href="topics/impala_config_performance.html">Post-Installation Configuration for Impala</a></li><li class="topicref"><a href="topics/impala_odbc.html">Configuring Impala to Work with ODBC</a></li><li class="topicref"><a href="topics/impala_jdbc.html">Configuring Impala to Work with JDBC</a></li></ul></li><li class="topicref"><a href="topics/impala_upgrading.html">Upgrading Impala</a></li><li class="top icref"><a href="topics/impala_processes.html">Starting Impala</a><ul><li class="topicref"><a href="topics/impala_config_options.html">Modifying Impala Startup Options</a></li></ul></li><li class="topicref"><a href="topics/impala_tutorial.html">Tutorials</a></li><li class="topicref"><a href="topics/impala_admin.html">Administration</a><ul><li class="topicref"><a href="topics/impala_admission.html">Admission Control and Query Queuing</a></li><li class="topicref"><a href="topics/impala_resource_management.html">Resource Management for Impala</a></li><li class="topicref"><a href="topics/impala_timeouts.html">Setting Timeouts</a></li><li class="topicref"><a href="topics/impala_proxy.html">Load-Balancing Proxy for HA</a></li><li class="topicref"><a href="topics/impala_disk_space.html">Managing Disk Space</a></li></ul></li><li class="topicref"><a href="topics/impala_security.html">Impala Security</a><ul><li class="topicref"><a href="topics/impala_security_guidelines.html">Security Guidelin es for Impala</a></li><li class="topicref"><a href="topics/impala_security_files.html">Securing Impala Data and Log Files</a></li><li class="topicref"><a href="topics/impala_security_install.html">Installation Considerations for Impala Security</a></li><li class="topicref"><a href="topics/impala_security_metastore.html">Securing the Hive Metastore Database</a></li><li class="topicref"><a href="topics/impala_security_webui.html">Securing the Impala Web User Interface</a></li><li class="topicref"><a href="topics/impala_ssl.html">Configuring TLS/SSL for Impala</a></li><li class="topicref"><a href="topics/impala_authorization.html">Enabling Sentry Authorization for Impala</a></li><li class="topicref"><a href="topics/impala_authentication.html">Impala Authentication</a><ul><li class="topicref"><a href="topics/impala_kerberos.html">Enabling Kerberos Authentication for Impala</a></li><li class="topicref"><a href="topics/impala_ldap.html">Enabling LDAP Authentication for Impala</a></li><li class="topicref"><a href="topics/impala_mixed_security.html">Using Multiple Authentication Methods with Impala</a></li><li class="topicref"><a href="topics/impala_delegation.html">Configuring Impala Delegation for Hue and BI Tools</a></li></ul></li><li class="topicref"><a href="topics/impala_auditing.html">Auditing</a></li><li class="topicref"><a href="topics/impala_lineage.html">Viewing Lineage Info</a></li></ul></li><li class="topicref"><a href="topics/impala_langref.html">SQL Reference</a><ul><li class="topicref"><a href="topics/impala_comments.html">Comments</a></li><li class="topicref"><a href="topics/impala_datatypes.html">Data Types</a><ul><li class="topicref"><a href="topics/impala_array.html">ARRAY Complex Type (Impala 2.3 or higher only)</a></li><li class="topicref"><a href="topics/impala_bigint.html">BIGINT</a></li><li class="topicref"><a href="topics/impala_boolean.html">BOOLEAN</a></li><li class="topicref"><a href="topics/impala_char.html">CHAR</a></li><li class="topicr ef"><a href="topics/impala_decimal.html">DECIMAL</a></li><li class="topicref"><a href="topics/impala_double.html">DOUBLE</a></li><li class="topicref"><a href="topics/impala_float.html">FLOAT</a></li><li class="topicref"><a href="topics/impala_int.html">INT</a></li><li class="topicref"><a href="topics/impala_map.html">MAP Complex Type (Impala 2.3 or higher only)</a></li><li class="topicref"><a href="topics/impala_real.html">REAL</a></li><li class="topicref"><a href="topics/impala_smallint.html">SMALLINT</a></li><li class="topicref"><a href="topics/impala_string.html">STRING</a></li><li class="topicref"><a href="topics/impala_struct.html">STRUCT Complex Type (Impala 2.3 or higher only)</a></li><li class="topicref"><a href="topics/impala_timestamp.html">TIMESTAMP</a></li><li class="topicref"><a href="topics/impala_tinyint.html">TINYINT</a></li><li class="topicref"><a href="topics/impala_varchar.html">VARCHAR</a></li><li class="topicref"><a href="topics/impala_complex_types.html">Comple x Types (Impala 2.3 or higher only)</a></li></ul></li><li class="topicref"><a href="topics/impala_literals.html">Literals</a></li><li class="topicref"><a href="topics/impala_operators.html">SQL Operators</a></li><li class="topicref"><a href="topics/impala_schema_objects.html">Schema Objects and Object Names</a><ul><li class="topicref"><a href="topics/impala_aliases.html">Aliases</a></li><li class="topicref"><a href="topics/impala_databases.html">Databases</a></li><li class="topicref"><a href="topics/impala_functions_overview.html">Functions</a></li><li class="topicref"><a href="topics/impala_identifiers.html">Identifiers</a></li><li class="topicref"><a href="topics/impala_tables.html">Tables</a></li><li class="topicref"><a href="topics/impala_views.html">Views</a></li></ul></li><li class="topicref"><a href="topics/impala_langref_sql.html">SQL Statements</a><ul><li class="topicref"><a href="topics/impala_ddl.html">DDL Statements</a></li><li class="topicref"><a href="topics/impala_dml .html">DML Statements</a></li><li class="topicref"><a href="topics/impala_alter_table.html">ALTER TABLE</a></li><li class="topicref"><a href="topics/impala_alter_view.html">ALTER VIEW</a></li><li class="topicref"><a href="topics/impala_compute_stats.html">COMPUTE STATS</a></li><li class="topicref"><a href="topics/impala_create_database.html">CREATE DATABASE</a></li><li class="topicref"><a href="topics/impala_create_function.html">CREATE FUNCTION</a></li><li class="topicref"><a href="topics/impala_create_role.html">CREATE ROLE</a></li><li class="topicref"><a href="topics/impala_create_table.html">CREATE TABLE</a></li><li class="topicref"><a href="topics/impala_create_view.html">CREATE VIEW</a></li><li class="topicref"><a href="topics/impala_delete.html">DELETE</a></li><li class="topicref"><a href="topics/impala_describe.html">DESCRIBE</a></li><li class="topicref"><a href="topics/impala_drop_database.html">DROP DATABASE</a></li><li class="topicref"><a href="topics/impala_drop_function .html">DROP FUNCTION</a></li><li class="topicref"><a href="topics/impala_drop_role.html">DROP ROLE</a></li><li class="topicref"><a href="topics/impala_drop_stats.html">DROP STATS</a></li><li class="topicref"><a href="topics/impala_drop_table.html">DROP TABLE</a></li><li class="topicref"><a href="topics/impala_drop_view.html">DROP VIEW</a></li><li class="topicref"><a href="topics/impala_explain.html">EXPLAIN</a></li><li class="topicref"><a href="topics/impala_grant.html">GRANT</a></li><li class="topicref"><a href="topics/impala_insert.html">INSERT</a></li><li class="topicref"><a href="topics/impala_invalidate_metadata.html">INVALIDATE METADATA</a></li><li class="topicref"><a href="topics/impala_load_data.html">LOAD DATA</a></li><li class="topicref"><a href="topics/impala_refresh.html">REFRESH</a></li><li class="topicref"><a href="topics/impala_revoke.html">REVOKE</a></li><li class="topicref"><a href="topics/impala_select.html">SELECT</a><ul><li class="topicref"><a href="topics/impala _joins.html">Joins</a></li><li class="topicref"><a href="topics/impala_order_by.html">ORDER BY Clause</a></li><li class="topicref"><a href="topics/impala_group_by.html">GROUP BY Clause</a></li><li class="topicref"><a href="topics/impala_having.html">HAVING Clause</a></li><li class="topicref"><a href="topics/impala_limit.html">LIMIT Clause</a></li><li class="topicref"><a href="topics/impala_offset.html">OFFSET Clause</a></li><li class="topicref"><a href="topics/impala_union.html">UNION Clause</a></li><li class="topicref"><a href="topics/impala_subqueries.html">Subqueries</a></li><li class="topicref"><a href="topics/impala_with.html">WITH Clause</a></li><li class="topicref"><a href="topics/impala_distinct.html">DISTINCT Operator</a></li><li class="topicref"><a href="topics/impala_hints.html">Hints</a></li></ul></li><li class="topicref"><a href="topics/impala_set.html">SET</a><ul><li class="topicref"><a href="topics/impala_query_options.html">Query Options for the SET Statement</a><ul> <li class="topicref"><a href="topics/impala_abort_on_default_limit_exceeded.html">ABORT_ON_DEFAULT_LIMIT_EXCEEDED</a></li><li class="topicref"><a href="topics/impala_abort_on_error.html">ABORT_ON_ERROR</a></li><li class="topicref"><a href="topics/impala_allow_unsupported_formats.html">ALLOW_UNSUPPORTED_FORMATS</a></li><li class="topicref"><a href="topics/impala_appx_count_distinct.html">APPX_COUNT_DISTINCT</a></li><li class="topicref"><a href="topics/impala_batch_size.html">BATCH_SIZE</a></li><li class="topicref"><a href="topics/impala_compression_codec.html">COMPRESSION_CODEC</a></li><li class="topicref"><a href="topics/impala_debug_action.html">DEBUG_ACTION</a></li><li class="topicref"><a href="topics/impala_default_join_distribution_mode.html">DEFAULT_JOIN_DISTRIBUTION_MODE</a></li><li class="topicref"><a href="topics/impala_default_order_by_limit.html">DEFAULT_ORDER_BY_LIMIT</a></li><li class="topicref"><a href="topics/impala_disable_codegen.html">DISABLE_CODEGEN</a></li><li cla ss="topicref"><a href="topics/impala_decimal_v2.html">DECIMAL_V2</a></li><li class="topicref"><a href="topics/impala_disable_row_runtime_filtering.html">DISABLE_ROW_RUNTIME_FILTERING</a></li><li class="topicref"><a href="topics/impala_disable_streaming_preaggregations.html">DISABLE_STREAMING_PREAGGREGATIONS</a></li><li class="topicref"><a href="topics/impala_disable_unsafe_spills.html">DISABLE_UNSAFE_SPILLS</a></li><li class="topicref"><a href="topics/impala_exec_single_node_rows_threshold.html">EXEC_SINGLE_NODE_ROWS_THRESHOLD</a></li><li class="topicref"><a href="topics/impala_explain_level.html">EXPLAIN_LEVEL</a></li><li class="topicref"><a href="topics/impala_hbase_cache_blocks.html">HBASE_CACHE_BLOCKS</a></li><li class="topicref"><a href="topics/impala_hbase_caching.html">HBASE_CACHING</a></li><li class="topicref"><a href="topics/impala_live_progress.html">LIVE_PROGRESS</a></li><li class="topicref"><a href="topics/impala_live_summary.html">LIVE_SUMMARY</a></li><li class="topicre f"><a href="topics/impala_max_errors.html">MAX_ERRORS</a></li><li class="topicref"><a href="topics/impala_max_io_buffers.html">MAX_IO_BUFFERS</a></li><li class="topicref"><a href="topics/impala_max_scan_range_length.html">MAX_SCAN_RANGE_LENGTH</a></li><li class="topicref"><a href="topics/impala_max_num_runtime_filters.html">MAX_NUM_RUNTIME_FILTERS</a></li><li class="topicref"><a href="topics/impala_mem_limit.html">MEM_LIMIT</a></li><li class="topicref"><a href="topics/impala_mt_dop.html">MT_DOP</a></li><li class="topicref"><a href="topics/impala_num_nodes.html">NUM_NODES</a></li><li class="topicref"><a href="topics/impala_num_scanner_threads.html">NUM_SCANNER_THREADS</a></li><li class="topicref"><a href="topics/impala_optimize_partition_key_scans.html">OPTIMIZE_PARTITION_KEY_SCANS</a></li><li class="topicref"><a href="topics/impala_parquet_compression_codec.html">PARQUET_COMPRESSION_CODEC</a></li><li class="topicref"><a href="topics/impala_parquet_annotate_strings_utf8.html">PARQUET _ANNOTATE_STRINGS_UTF8</a></li><li class="topicref"><a href="topics/impala_parquet_fallback_schema_resolution.html">PARQUET_FALLBACK_SCHEMA_RESOLUTION</a></li><li class="topicref"><a href="topics/impala_parquet_file_size.html">PARQUET_FILE_SIZE</a></li><li class="topicref"><a href="topics/impala_prefetch_mode.html">PREFETCH_MODE</a></li><li class="topicref"><a href="topics/impala_query_timeout_s.html">QUERY_TIMEOUT_S</a></li><li class="topicref"><a href="topics/impala_request_pool.html">REQUEST_POOL</a></li><li class="topicref"><a href="topics/impala_replica_preference.html">REPLICA_PREFERENCE</a></li><li class="topicref"><a href="topics/impala_reservation_request_timeout.html">RESERVATION_REQUEST_TIMEOUT</a></li><li class="topicref"><a href="topics/impala_runtime_bloom_filter_size.html">RUNTIME_BLOOM_FILTER_SIZE</a></li><li class="topicref"><a href="topics/impala_runtime_filter_max_size.html">RUNTIME_FILTER_MAX_SIZE</a></li><li class="topicref"><a href="topics/impala_runtime_filter _min_size.html">RUNTIME_FILTER_MIN_SIZE</a></li><li class="topicref"><a href="topics/impala_runtime_filter_mode.html">RUNTIME_FILTER_MODE</a></li><li class="topicref"><a href="topics/impala_runtime_filter_wait_time_ms.html">RUNTIME_FILTER_WAIT_TIME_MS</a></li><li class="topicref"><a href="topics/impala_s3_skip_insert_staging.html">S3_SKIP_INSERT_STAGING</a></li><li class="topicref"><a href="topics/impala_scan_node_codegen_threshold.html">SCAN_NODE_CODEGEN_THRESHOLD</a></li><li class="topicref"><a href="topics/impala_scratch_limit.html">SCRATCH_LIMIT</a></li><li class="topicref"><a href="topics/impala_schedule_random_replica.html">SCHEDULE_RANDOM_REPLICA</a></li><li class="topicref"><a href="topics/impala_support_start_over.html">SUPPORT_START_OVER</a></li><li class="topicref"><a href="topics/impala_sync_ddl.html">SYNC_DDL</a></li><li class="topicref"><a href="topics/impala_v_cpu_cores.html">V_CPU_CORES</a></li></ul></li></ul></li><li class="topicref"><a href="topics/impala_show.html ">SHOW</a></li><li class="topicref"><a href="topics/impala_truncate_table.html">TRUNCATE TABLE</a></li><li class="topicref"><a href="topics/impala_update.html">UPDATE</a></li><li class="topicref"><a href="topics/impala_upsert.html">UPSERT</a></li><li class="topicref"><a href="topics/impala_use.html">USE</a></li></ul></li><li class="topicref"><a href="topics/impala_functions.html">Built-In Functions</a><ul><li class="topicref"><a href="topics/impala_math_functions.html">Mathematical Functions</a></li><li class="topicref"><a href="topics/impala_bit_functions.html">Bit Functions</a></li><li class="topicref"><a href="topics/impala_conversion_functions.html">Type Conversion Functions</a></li><li class="topicref"><a href="topics/impala_datetime_functions.html">Date and Time Functions</a></li><li class="topicref"><a href="topics/impala_conditional_functions.html">Conditional Functions</a></li><li class="topicref"><a href="topics/impala_string_functions.html">String Functions</a></li><li cl ass="topicref"><a href="topics/impala_misc_functions.html">Miscellaneous Functions</a></li><li class="topicref"><a href="topics/impala_aggregate_functions.html">Aggregate Functions</a><ul><li class="topicref"><a href="topics/impala_appx_median.html">APPX_MEDIAN</a></li><li class="topicref"><a href="topics/impala_avg.html">AVG</a></li><li class="topicref"><a href="topics/impala_count.html">COUNT</a></li><li class="topicref"><a href="topics/impala_group_concat.html">GROUP_CONCAT</a></li><li class="topicref"><a href="topics/impala_max.html">MAX</a></li><li class="topicref"><a href="topics/impala_min.html">MIN</a></li><li class="topicref"><a href="topics/impala_ndv.html">NDV</a></li><li class="topicref"><a href="topics/impala_stddev.html">STDDEV, STDDEV_SAMP, STDDEV_POP</a></li><li class="topicref"><a href="topics/impala_sum.html">SUM</a></li><li class="topicref"><a href="topics/impala_variance.html">VARIANCE, VARIANCE_SAMP, VARIANCE_POP, VAR_SAMP, VAR_POP</a></li></ul></li><li class="t opicref"><a href="topics/impala_analytic_functions.html">Analytic Functions</a></li><li class="topicref"><a href="topics/impala_udf.html">Impala User-Defined Functions (UDFs)</a></li></ul></li><li class="topicref"><a href="topics/impala_langref_unsupported.html">SQL Differences Between Impala and Hive</a></li><li class="topicref"><a href="topics/impala_porting.html">Porting SQL</a></li></ul></li><li class="topicref"><a href="topics/impala_impala_shell.html">The Impala Shell</a><ul><li class="topicref"><a href="topics/impala_shell_options.html">Configuration Options</a></li><li class="topicref"><a href="topics/impala_connecting.html">Connecting to impalad</a></li><li class="topicref"><a href="topics/impala_shell_running_commands.html">Running Commands and SQL Statements</a></li><li class="topicref"><a href="topics/impala_shell_commands.html">Command Reference</a></li></ul></li><li class="topicref"><a href="topics/impala_performance.html">Performance Tuning</a><ul><li class="topicref" ><a href="topics/impala_perf_cookbook.html">Performance Best >Practices</a></li><li class="topicref"><a >href="topics/impala_perf_joins.html">Join Performance</a></li><li >class="topicref"><a href="topics/impala_perf_stats.html">Table and Column >Statistics</a></li><li class="topicref"><a >href="topics/impala_perf_benchmarking.html">Benchmarking</a></li><li >class="topicref"><a href="topics/impala_perf_resources.html">Controlling >Resource Usage</a></li><li class="topicref"><a >href="topics/impala_runtime_filtering.html">Runtime Filtering</a></li><li >class="topicref"><a href="topics/impala_perf_hdfs_caching.html">HDFS >Caching</a></li><li class="topicref"><a >href="topics/impala_perf_testing.html">Testing Impala Performance</a></li><li >class="topicref"><a href="topics/impala_explain_plan.html">EXPLAIN Plans and >Query Profiles</a></li><li class="topicref"><a >href="topics/impala_perf_skew.html">HDFS Block Skew</a></li></ul></li><li >class="topicref"><a href="topics/impala_scalability.html">Scala bility Considerations</a></li><li class="topicref"><a href="topics/impala_partitioning.html">Partitioning</a></li><li class="topicref"><a href="topics/impala_file_formats.html">File Formats</a><ul><li class="topicref"><a href="topics/impala_txtfile.html">Text Data Files</a></li><li class="topicref"><a href="topics/impala_parquet.html">Parquet Data Files</a></li><li class="topicref"><a href="topics/impala_avro.html">Avro Data Files</a></li><li class="topicref"><a href="topics/impala_rcfile.html">RCFile Data Files</a></li><li class="topicref"><a href="topics/impala_seqfile.html">SequenceFile Data Files</a></li></ul></li><li class="topicref"><a href="topics/impala_kudu.html">Using Impala to Query Kudu Tables</a></li><li class="topicref"><a href="topics/impala_hbase.html">HBase Tables</a></li><li class="topicref"><a href="topics/impala_s3.html">S3 Tables</a></li><li class="topicref"><a href="topics/impala_adls.html">ADLS Tables</a></li><li class="topicref"><a href="topics/impala_isilon. html">Isilon Storage</a></li><li class="topicref"><a href="topics/impala_logging.html">Logging</a></li><li class="topicref"><a href="topics/impala_troubleshooting.html">Troubleshooting Impala</a><ul><li class="topicref"><a href="topics/impala_webui.html">Web User Interface</a></li><li class="topicref"><a href="topics/impala_breakpad.html">Breakpad Minidumps</a></li></ul></li><li class="topicref"><a href="topics/impala_ports.html">Ports Used by Impala</a></li><li class="topicref"><a href="topics/impala_reserved_words.html">Impala Reserved Words</a></li><li class="topicref"><a href="topics/impala_faq.html">Impala Frequently Asked Questions</a></li><li class="topicref"><a href="topics/impala_release_notes.html">Impala Release Notes</a><ul><li class="topicref"><a href="topics/impala_relnotes.html">Impala Release Notes</a></li><li class="topicref"><a href="topics/impala_new_features.html">New Features in Apache Impala (incubating)</a></li><li class="topicref"><a href="topics/impala_incom patible_changes.html">Incompatible Changes and Limitations in Apache Impala (incubating)</a></li><li class="topicref"><a href="topics/impala_known_issues.html">Known Issues and Workarounds in Impala</a></li><li class="topicref"><a href="topics/impala_fixed_issues.html">Fixed Issues in Apache Impala (incubating)</a></li></ul></li></ul></nav></body></html> \ No newline at end of file http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/ae2f8d03/docs/build/html/topics/impala_adls.html ---------------------------------------------------------------------- diff --git a/docs/build/html/topics/impala_adls.html b/docs/build/html/topics/impala_adls.html new file mode 100644 index 0000000..2f10f7f --- /dev/null +++ b/docs/build/html/topics/impala_adls.html @@ -0,0 +1,645 @@ +<!DOCTYPE html + SYSTEM "about:legacy-compat"> +<html lang="en"><head><meta http-equiv="Content-Type" content="text/html; charset=UTF-8"><meta charset="UTF-8"><meta name="copyright" content="(C) Copyright 2017"><meta name="DC.rights.owner" content="(C) Copyright 2017"><meta name="DC.Type" content="concept"><meta name="prodname" content="Impala"><meta name="version" content="Impala 2.8.x"><meta name="DC.Format" content="XHTML"><meta name="DC.Identifier" content="adls"><link rel="stylesheet" type="text/css" href="../commonltr.css"><title>Using Impala with the Azure Data Lake Store (ADLS)</title></head><body id="adls"><main role="main"><article role="article" aria-labelledby="ariaid-title1"> + + <h1 class="title topictitle1" id="ariaid-title1">Using Impala with the Azure Data Lake Store (ADLS)</h1> + + + + <div class="body conbody"> + + <div class="note important note_important"><span class="note__title importanttitle">Important:</span> + <p class="p"> + Currently, the ADLS support in Impala is preliminary and not + fully tested. Do not use Impala with ADLS in a production environment. + </p> + </div> + + <p class="p"> + + You can use Impala to query data residing on the Azure Data Lake Store (ADLS) filesystem. + This capability allows convenient access to a storage system that is remotely managed, + accessible from anywhere, and integrated with various cloud-based services. Impala can + query files in any supported file format from ADLS. The ADLS storage location + can be for an entire table, or individual partitions in a partitioned table. + </p> + + <p class="p"> + The default Impala tables use data files stored on HDFS, which are ideal for bulk loads and queries using + full-table scans. In contrast, queries against ADLS data are less performant, making ADLS suitable for holding + <span class="q">"cold"</span> data that is only queried occasionally, while more frequently accessed <span class="q">"hot"</span> data resides in + HDFS. In a partitioned table, you can set the <code class="ph codeph">LOCATION</code> attribute for individual partitions + to put some partitions on HDFS and others on ADLS, typically depending on the age of the data. + </p> + + <p class="p toc inpage"></p> + + </div> + + <article class="topic concept nested1" aria-labelledby="ariaid-title2" id="adls__prereqs"> + <h2 class="title topictitle2" id="ariaid-title2">Prerequisites</h2> + <div class="body conbody"> + <p class="p"> + These procedures presume that you have already set up an Azure account, + configured an ADLS store, and configured your Hadoop cluster with appropriate + credentials to be able to access ADLS. See the following resources for information: + </p> + <ul class="ul"> + <li class="li"> + <p class="p"> + <a class="xref" href="https://docs.microsoft.com/en-us/azure/data-lake-store/data-lake-store-get-started-portal" target="_blank">Get started with Azure Data Lake Store using the Azure Portal</a> + </p> + </li> + <li class="li"> + <p class="p"> + <a class="xref" href="https://hadoop.apache.org/docs/current2/hadoop-azure-datalake/index.html" target="_blank">Hadoop Azure Data Lake Support</a> + </p> + </li> + </ul> + </div> + </article> + + <article class="topic concept nested1" aria-labelledby="ariaid-title3" id="adls__sql"> + <h2 class="title topictitle2" id="ariaid-title3">How Impala SQL Statements Work with ADLS</h2> + <div class="body conbody"> + <p class="p"> + Impala SQL statements work with data on ADLS as follows: + </p> + <ul class="ul"> + <li class="li"> + <p class="p"> + The <a class="xref" href="impala_create_table.html#create_table">CREATE TABLE Statement</a> + or <a class="xref" href="impala_alter_table.html#alter_table">ALTER TABLE Statement</a> statements + can specify that a table resides on the ADLS filesystem by + encoding an <code class="ph codeph">adl://</code> prefix for the <code class="ph codeph">LOCATION</code> + property. <code class="ph codeph">ALTER TABLE</code> can also set the <code class="ph codeph">LOCATION</code> + property for an individual partition, so that some data in a table resides on + ADLS and other data in the same table resides on HDFS. + </p> + <div class="p"> + The full format of the location URI is typically: +<pre class="pre codeblock"><code> +adl://<var class="keyword varname">your_account</var>.azuredatalakestore.net/<var class="keyword varname">rest_of_directory_path</var> +</code></pre> + </div> + </li> + <li class="li"> + <p class="p"> + Once a table or partition is designated as residing on ADLS, the <a class="xref" href="impala_select.html#select">SELECT Statement</a> + statement transparently accesses the data files from the appropriate storage layer. + </p> + </li> + <li class="li"> + <p class="p"> + If the ADLS table is an internal table, the <a class="xref" href="impala_drop_table.html#drop_table">DROP TABLE Statement</a> statement + removes the corresponding data files from ADLS when the table is dropped. + </p> + </li> + <li class="li"> + <p class="p"> + The <a class="xref" href="impala_truncate_table.html#truncate_table">TRUNCATE TABLE Statement (Impala 2.3 or higher only)</a> statement always removes the corresponding + data files from ADLS when the table is truncated. + </p> + </li> + <li class="li"> + <p class="p"> + The <a class="xref" href="impala_load_data.html#load_data">LOAD DATA Statement</a> can move data files residing in HDFS into + an ADLS table. + </p> + </li> + <li class="li"> + <p class="p"> + The <a class="xref" href="impala_insert.html#insert">INSERT Statement</a>, or the <code class="ph codeph">CREATE TABLE AS SELECT</code> + form of the <code class="ph codeph">CREATE TABLE</code> statement, can copy data from an HDFS table or another ADLS + table into an ADLS table. + </p> + </li> + </ul> + <p class="p"> + For usage information about Impala SQL statements with ADLS tables, see <a class="xref" href="impala_adls.html#ddl">Creating Impala Databases, Tables, and Partitions for Data Stored on ADLS</a> + and <a class="xref" href="impala_adls.html#dml">Using Impala DML Statements for ADLS Data</a>. + </p> + </div> + </article> + + <article class="topic concept nested1" aria-labelledby="ariaid-title4" id="adls__creds"> + + <h2 class="title topictitle2" id="ariaid-title4">Specifying Impala Credentials to Access Data in ADLS</h2> + + <div class="body conbody"> + + <p class="p"> + To allow Impala to access data in ADLS, specify values for the following configuration settings in your + <span class="ph filepath">core-site.xml</span> file: + </p> + +<pre class="pre codeblock"><code> +<property> + <name>dfs.adls.oauth2.access.token.provider.type</name> + <value>ClientCredential</value> +</property> +<property> + <name>dfs.adls.oauth2.client.id</name> + <value><varname>your_client_id</varname></value> +</property> +<property> + <name>dfs.adls.oauth2.credential</name> + <value><varname>your_client_secret</varname></value> +</property> +<property> + <name>dfs.adls.oauth2.refresh.url</name> + <value><varname>refresh_URL</varname></value> +</property> + +</code></pre> + + <div class="note note note_note"><span class="note__title notetitle">Note:</span> + <p class="p"> + Check if your Hadoop distribution or cluster management tool includes support for + filling in and distributing credentials across the cluster in an automated way. + </p> + </div> + + <p class="p"> + After specifying the credentials, restart both the Impala and + Hive services. (Restarting Hive is required because Impala queries, CREATE TABLE statements, and so on go + through the Hive metastore.) + </p> + + </div> + + </article> + + <article class="topic concept nested1" aria-labelledby="ariaid-title5" id="adls__etl"> + + <h2 class="title topictitle2" id="ariaid-title5">Loading Data into ADLS for Impala Queries</h2> + + + <div class="body conbody"> + + <p class="p"> + If your ETL pipeline involves moving data into ADLS and then querying through Impala, + you can either use Impala DML statements to create, move, or copy the data, or + use the same data loading techniques as you would for non-Impala data. + </p> + + </div> + + <article class="topic concept nested2" aria-labelledby="ariaid-title6" id="etl__dml"> + <h3 class="title topictitle3" id="ariaid-title6">Using Impala DML Statements for ADLS Data</h3> + <div class="body conbody"> + <p class="p"> + In <span class="keyword">Impala 2.9</span> and higher, the Impala DML statements (<code class="ph codeph">INSERT</code>, <code class="ph codeph">LOAD DATA</code>, + and <code class="ph codeph">CREATE TABLE AS SELECT</code>) can write data into a table or partition that resides in the + Azure Data Lake Store (ADLS). + The syntax of the DML statements is the same as for any other tables, because the ADLS location for tables and + partitions is specified by an <code class="ph codeph">adl://</code> prefix in the + <code class="ph codeph">LOCATION</code> attribute of + <code class="ph codeph">CREATE TABLE</code> or <code class="ph codeph">ALTER TABLE</code> statements. + If you bring data into ADLS using the normal ADLS transfer mechanisms instead of Impala DML statements, + issue a <code class="ph codeph">REFRESH</code> statement for the table before using Impala to query the ADLS data. + </p> + </div> + </article> + + <article class="topic concept nested2" aria-labelledby="ariaid-title7" id="etl__manual_etl"> + <h3 class="title topictitle3" id="ariaid-title7">Manually Loading Data into Impala Tables on ADLS</h3> + <div class="body conbody"> + <p class="p"> + As an alternative, you can use the Microsoft-provided methods to bring data files + into ADLS for querying through Impala. See + <a class="xref" href="https://docs.microsoft.com/en-us/azure/data-lake-store/data-lake-store-copy-data-azure-storage-blob" target="_blank">the Microsoft ADLS documentation</a> + for details. + </p> + + <p class="p"> + After you upload data files to a location already mapped to an Impala table or partition, or if you delete + files in ADLS from such a location, issue the <code class="ph codeph">REFRESH <var class="keyword varname">table_name</var></code> + statement to make Impala aware of the new set of data files. + </p> + + </div> + </article> + + </article> + + <article class="topic concept nested1" aria-labelledby="ariaid-title8" id="adls__ddl"> + + <h2 class="title topictitle2" id="ariaid-title8">Creating Impala Databases, Tables, and Partitions for Data Stored on ADLS</h2> + + + <div class="body conbody"> + + <p class="p"> + Impala reads data for a table or partition from ADLS based on the <code class="ph codeph">LOCATION</code> attribute for the + table or partition. Specify the ADLS details in the <code class="ph codeph">LOCATION</code> clause of a <code class="ph codeph">CREATE + TABLE</code> or <code class="ph codeph">ALTER TABLE</code> statement. The notation for the <code class="ph codeph">LOCATION</code> + clause is <code class="ph codeph">adl://<var class="keyword varname">store</var>/<var class="keyword varname">path/to/file</var></code>. + </p> + + <p class="p"> + For a partitioned table, either specify a separate <code class="ph codeph">LOCATION</code> clause for each new partition, + or specify a base <code class="ph codeph">LOCATION</code> for the table and set up a directory structure in ADLS to mirror + the way Impala partitioned tables are structured in HDFS. Although, strictly speaking, ADLS filenames do not + have directory paths, Impala treats ADLS filenames with <code class="ph codeph">/</code> characters the same as HDFS + pathnames that include directories. + </p> + + <p class="p"> + To point a nonpartitioned table or an individual partition at ADLS, specify a single directory + path in ADLS, which could be any arbitrary directory. To replicate the structure of an entire Impala + partitioned table or database in ADLS requires more care, with directories and subdirectories nested and + named to match the equivalent directory tree in HDFS. Consider setting up an empty staging area if + necessary in HDFS, and recording the complete directory structure so that you can replicate it in ADLS. + </p> + + <p class="p"> + For example, the following session creates a partitioned table where only a single partition resides on ADLS. + The partitions for years 2013 and 2014 are located on HDFS. The partition for year 2015 includes a + <code class="ph codeph">LOCATION</code> attribute with an <code class="ph codeph">adl://</code> URL, and so refers to data residing on + ADLS, under a specific path underneath the store <code class="ph codeph">impalademo</code>. + </p> + +<pre class="pre codeblock"><code>[localhost:21000] > create database db_on_hdfs; +[localhost:21000] > use db_on_hdfs; +[localhost:21000] > create table mostly_on_hdfs (x int) partitioned by (year int); +[localhost:21000] > alter table mostly_on_hdfs add partition (year=2013); +[localhost:21000] > alter table mostly_on_hdfs add partition (year=2014); +[localhost:21000] > alter table mostly_on_hdfs add partition (year=2015) + > location 'adl://impalademo.azuredatalakestore.net/dir1/dir2/dir3/t1'; +</code></pre> + + <p class="p"> + For convenience when working with multiple tables with data files stored in ADLS, you can create a database + with a <code class="ph codeph">LOCATION</code> attribute pointing to an ADLS path. + Specify a URL of the form <code class="ph codeph">adl://<var class="keyword varname">store</var>/<var class="keyword varname">root/path/for/database</var></code> + for the <code class="ph codeph">LOCATION</code> attribute of the database. + Any tables created inside that database + automatically create directories underneath the one specified by the database + <code class="ph codeph">LOCATION</code> attribute. + </p> + + <p class="p"> + The following session creates a database and two partitioned tables residing entirely on ADLS, one + partitioned by a single column and the other partitioned by multiple columns. Because a + <code class="ph codeph">LOCATION</code> attribute with an <code class="ph codeph">adl://</code> URL is specified for the database, the + tables inside that database are automatically created on ADLS underneath the database directory. To see the + names of the associated subdirectories, including the partition key values, we use an ADLS client tool to + examine how the directory structure is organized on ADLS. For example, Impala partition directories such as + <code class="ph codeph">month=1</code> do not include leading zeroes, which sometimes appear in partition directories created + through Hive. + </p> + +<pre class="pre codeblock"><code>[localhost:21000] > create database db_on_adls location 'adl://impalademo.azuredatalakestore.net/dir1/dir2/dir3'; +[localhost:21000] > use db_on_adls; + +[localhost:21000] > create table partitioned_on_adls (x int) partitioned by (year int); +[localhost:21000] > alter table partitioned_on_adls add partition (year=2013); +[localhost:21000] > alter table partitioned_on_adls add partition (year=2014); +[localhost:21000] > alter table partitioned_on_adls add partition (year=2015); + +[localhost:21000] > ! hadoop fs -ls adl://impalademo.azuredatalakestore.net/dir1/dir2/dir3 --recursive; +2015-03-17 13:56:34 0 dir1/dir2/dir3/ +2015-03-17 16:43:28 0 dir1/dir2/dir3/partitioned_on_adls/ +2015-03-17 16:43:49 0 dir1/dir2/dir3/partitioned_on_adls/year=2013/ +2015-03-17 16:43:53 0 dir1/dir2/dir3/partitioned_on_adls/year=2014/ +2015-03-17 16:43:58 0 dir1/dir2/dir3/partitioned_on_adls/year=2015/ + +[localhost:21000] > create table partitioned_multiple_keys (x int) + > partitioned by (year smallint, month tinyint, day tinyint); +[localhost:21000] > alter table partitioned_multiple_keys + > add partition (year=2015,month=1,day=1); +[localhost:21000] > alter table partitioned_multiple_keys + > add partition (year=2015,month=1,day=31); +[localhost:21000] > alter table partitioned_multiple_keys + > add partition (year=2015,month=2,day=28); + +[localhost:21000] > ! hadoop fs -ls adl://impalademo.azuredatalakestore.net/dir1/dir2/dir3 --recursive; +2015-03-17 13:56:34 0 dir1/dir2/dir3/ +2015-03-17 16:47:13 0 dir1/dir2/dir3/partitioned_multiple_keys/ +2015-03-17 16:47:44 0 dir1/dir2/dir3/partitioned_multiple_keys/year=2015/month=1/day=1/ +2015-03-17 16:47:50 0 dir1/dir2/dir3/partitioned_multiple_keys/year=2015/month=1/day=31/ +2015-03-17 16:47:57 0 dir1/dir2/dir3/partitioned_multiple_keys/year=2015/month=2/day=28/ +2015-03-17 16:43:28 0 dir1/dir2/dir3/partitioned_on_adls/ +2015-03-17 16:43:49 0 dir1/dir2/dir3/partitioned_on_adls/year=2013/ +2015-03-17 16:43:53 0 dir1/dir2/dir3/partitioned_on_adls/year=2014/ +2015-03-17 16:43:58 0 dir1/dir2/dir3/partitioned_on_adls/year=2015/ +</code></pre> + + <p class="p"> + The <code class="ph codeph">CREATE DATABASE</code> and <code class="ph codeph">CREATE TABLE</code> statements create the associated + directory paths if they do not already exist. You can specify multiple levels of directories, and the + <code class="ph codeph">CREATE</code> statement creates all appropriate levels, similar to using <code class="ph codeph">mkdir + -p</code>. + </p> + + <p class="p"> + Use the standard ADLS file upload methods to actually put the data files into the right locations. You can + also put the directory paths and data files in place before creating the associated Impala databases or + tables, and Impala automatically uses the data from the appropriate location after the associated databases + and tables are created. + </p> + + <p class="p"> + You can switch whether an existing table or partition points to data in HDFS or ADLS. For example, if you + have an Impala table or partition pointing to data files in HDFS or ADLS, and you later transfer those data + files to the other filesystem, use an <code class="ph codeph">ALTER TABLE</code> statement to adjust the + <code class="ph codeph">LOCATION</code> attribute of the corresponding table or partition to reflect that change. Because + Impala does not have an <code class="ph codeph">ALTER DATABASE</code> statement, this location-switching technique is not + practical for entire databases that have a custom <code class="ph codeph">LOCATION</code> attribute. + </p> + + </div> + + </article> + + <article class="topic concept nested1" aria-labelledby="ariaid-title9" id="adls__internal_external"> + + <h2 class="title topictitle2" id="ariaid-title9">Internal and External Tables Located on ADLS</h2> + + <div class="body conbody"> + + <p class="p"> + Just as with tables located on HDFS storage, you can designate ADLS-based tables as either internal (managed + by Impala) or external, by using the syntax <code class="ph codeph">CREATE TABLE</code> or <code class="ph codeph">CREATE EXTERNAL + TABLE</code> respectively. When you drop an internal table, the files associated with the table are + removed, even if they are on ADLS storage. When you drop an external table, the files associated with the + table are left alone, and are still available for access by other tools or components. See + <a class="xref" href="impala_tables.html#tables">Overview of Impala Tables</a> for details. + </p> + + <p class="p"> + If the data on ADLS is intended to be long-lived and accessed by other tools in addition to Impala, create + any associated ADLS tables with the <code class="ph codeph">CREATE EXTERNAL TABLE</code> syntax, so that the files are not + deleted from ADLS when the table is dropped. + </p> + + <p class="p"> + If the data on ADLS is only needed for querying by Impala and can be safely discarded once the Impala + workflow is complete, create the associated ADLS tables using the <code class="ph codeph">CREATE TABLE</code> syntax, so + that dropping the table also deletes the corresponding data files on ADLS. + </p> + + <p class="p"> + For example, this session creates a table in ADLS with the same column layout as a table in HDFS, then + examines the ADLS table and queries some data from it. The table in ADLS works the same as a table in HDFS as + far as the expected file format of the data, table and column statistics, and other table properties. The + only indication that it is not an HDFS table is the <code class="ph codeph">adl://</code> URL in the + <code class="ph codeph">LOCATION</code> property. Many data files can reside in the ADLS directory, and their combined + contents form the table data. Because the data in this example is uploaded after the table is created, a + <code class="ph codeph">REFRESH</code> statement prompts Impala to update its cached information about the data files. + </p> + +<pre class="pre codeblock"><code>[localhost:21000] > create table usa_cities_adls like usa_cities location 'adl://impalademo.azuredatalakestore.net/usa_cities'; +[localhost:21000] > desc usa_cities_adls; ++-------+----------+---------+ +| name | type | comment | ++-------+----------+---------+ +| id | smallint | | +| city | string | | +| state | string | | ++-------+----------+---------+ + +-- Now from a web browser, upload the same data file(s) to ADLS as in the HDFS table, +-- under the relevant store and path. If you already have the data in ADLS, you would +-- point the table LOCATION at an existing path. + +[localhost:21000] > refresh usa_cities_adls; +[localhost:21000] > select count(*) from usa_cities_adls; ++----------+ +| count(*) | ++----------+ +| 289 | ++----------+ +[localhost:21000] > select distinct state from sample_data_adls limit 5; ++----------------------+ +| state | ++----------------------+ +| Louisiana | +| Minnesota | +| Georgia | +| Alaska | +| Ohio | ++----------------------+ +[localhost:21000] > desc formatted usa_cities_adls; ++------------------------------+----------------------------------------------------+---------+ +| name | type | comment | ++------------------------------+----------------------------------------------------+---------+ +| # col_name | data_type | comment | +| | NULL | NULL | +| id | smallint | NULL | +| city | string | NULL | +| state | string | NULL | +| | NULL | NULL | +| # Detailed Table Information | NULL | NULL | +| Database: | adls_testing | NULL | +| Owner: | jrussell | NULL | +| CreateTime: | Mon Mar 16 11:36:25 PDT 2017 | NULL | +| LastAccessTime: | UNKNOWN | NULL | +| Protect Mode: | None | NULL | +| Retention: | 0 | NULL | +| Location: | adl://impalademo.azuredatalakestore.net/usa_cities | NULL | +| Table Type: | MANAGED_TABLE | NULL | +... ++------------------------------+----------------------------------------------------+---------+ +</code></pre> + + <p class="p"> + In this case, we have already uploaded a Parquet file with a million rows of data to the + <code class="ph codeph">sample_data</code> directory underneath the <code class="ph codeph">impalademo</code> store on ADLS. This + session creates a table with matching column settings pointing to the corresponding location in ADLS, then + queries the table. Because the data is already in place on ADLS when the table is created, no + <code class="ph codeph">REFRESH</code> statement is required. + </p> + +<pre class="pre codeblock"><code>[localhost:21000] > create table sample_data_adls + > (id int, id bigint, val int, zerofill string, + > name string, assertion boolean, city string, state string) + > stored as parquet location 'adl://impalademo.azuredatalakestore.net/sample_data'; +[localhost:21000] > select count(*) from sample_data_adls; ++----------+ +| count(*) | ++----------+ +| 1000000 | ++----------+ +[localhost:21000] > select count(*) howmany, assertion from sample_data_adls group by assertion; ++---------+-----------+ +| howmany | assertion | ++---------+-----------+ +| 667149 | true | +| 332851 | false | ++---------+-----------+ +</code></pre> + + </div> + + </article> + + <article class="topic concept nested1" aria-labelledby="ariaid-title10" id="adls__queries"> + + <h2 class="title topictitle2" id="ariaid-title10">Running and Tuning Impala Queries for Data Stored on ADLS</h2> + + <div class="body conbody"> + + <p class="p"> + Once the appropriate <code class="ph codeph">LOCATION</code> attributes are set up at the table or partition level, you + query data stored in ADLS exactly the same as data stored on HDFS or in HBase: + </p> + + <ul class="ul"> + <li class="li"> + Queries against ADLS data support all the same file formats as for HDFS data. + </li> + + <li class="li"> + Tables can be unpartitioned or partitioned. For partitioned tables, either manually construct paths in ADLS + corresponding to the HDFS directories representing partition key values, or use <code class="ph codeph">ALTER TABLE ... + ADD PARTITION</code> to set up the appropriate paths in ADLS. + </li> + + <li class="li"> + HDFS, Kudu, and HBase tables can be joined to ADLS tables, or ADLS tables can be joined with each other. + </li> + + <li class="li"> + Authorization using the Sentry framework to control access to databases, tables, or columns works the + same whether the data is in HDFS or in ADLS. + </li> + + <li class="li"> + The <span class="keyword cmdname">catalogd</span> daemon caches metadata for both HDFS and ADLS tables. Use + <code class="ph codeph">REFRESH</code> and <code class="ph codeph">INVALIDATE METADATA</code> for ADLS tables in the same situations + where you would issue those statements for HDFS tables. + </li> + + <li class="li"> + Queries against ADLS tables are subject to the same kinds of admission control and resource management as + HDFS tables. + </li> + + <li class="li"> + Metadata about ADLS tables is stored in the same metastore database as for HDFS tables. + </li> + + <li class="li"> + You can set up views referring to ADLS tables, the same as for HDFS tables. + </li> + + <li class="li"> + The <code class="ph codeph">COMPUTE STATS</code>, <code class="ph codeph">SHOW TABLE STATS</code>, and <code class="ph codeph">SHOW COLUMN + STATS</code> statements work for ADLS tables also. + </li> + </ul> + + </div> + + <article class="topic concept nested2" aria-labelledby="ariaid-title11" id="queries__performance"> + + <h3 class="title topictitle3" id="ariaid-title11">Understanding and Tuning Impala Query Performance for ADLS Data</h3> + + + <div class="body conbody"> + + <p class="p"> + Although Impala queries for data stored in ADLS might be less performant than queries against the + equivalent data stored in HDFS, you can still do some tuning. Here are techniques you can use to + interpret explain plans and profiles for queries against ADLS data, and tips to achieve the best + performance possible for such queries. + </p> + + <p class="p"> + All else being equal, performance is expected to be lower for queries running against data on ADLS rather + than HDFS. The actual mechanics of the <code class="ph codeph">SELECT</code> statement are somewhat different when the + data is in ADLS. Although the work is still distributed across the datanodes of the cluster, Impala might + parallelize the work for a distributed query differently for data on HDFS and ADLS. ADLS does not have the + same block notion as HDFS, so Impala uses heuristics to determine how to split up large ADLS files for + processing in parallel. Because all hosts can access any ADLS data file with equal efficiency, the + distribution of work might be different than for HDFS data, where the data blocks are physically read + using short-circuit local reads by hosts that contain the appropriate block replicas. Although the I/O to + read the ADLS data might be spread evenly across the hosts of the cluster, the fact that all data is + initially retrieved across the network means that the overall query performance is likely to be lower for + ADLS data than for HDFS data. + </p> + + <p class="p"> + Because ADLS does not expose the block sizes of data files the way HDFS does, + any Impala <code class="ph codeph">INSERT</code> or <code class="ph codeph">CREATE TABLE AS SELECT</code> statements + use the <code class="ph codeph">PARQUET_FILE_SIZE</code> query option setting to define the size of + Parquet data files. (Using a large block size is more important for Parquet tables than + for tables that use other file formats.) + </p> + + <p class="p"> + When optimizing aspects of for complex queries such as the join order, Impala treats tables on HDFS and + ADLS the same way. Therefore, follow all the same tuning recommendations for ADLS tables as for HDFS ones, + such as using the <code class="ph codeph">COMPUTE STATS</code> statement to help Impala construct accurate estimates of + row counts and cardinality. See <a class="xref" href="impala_performance.html#performance">Tuning Impala for Performance</a> for details. + </p> + + <p class="p"> + In query profile reports, the numbers for <code class="ph codeph">BytesReadLocal</code>, + <code class="ph codeph">BytesReadShortCircuit</code>, <code class="ph codeph">BytesReadDataNodeCached</code>, and + <code class="ph codeph">BytesReadRemoteUnexpected</code> are blank because those metrics come from HDFS. + If you do see any indications that a query against an ADLS table performed <span class="q">"remote read"</span> + operations, do not be alarmed. That is expected because, by definition, all the I/O for ADLS tables involves + remote reads. + </p> + + </div> + + </article> + + </article> + + <article class="topic concept nested1" aria-labelledby="ariaid-title12" id="adls__restrictions"> + + <h2 class="title topictitle2" id="ariaid-title12">Restrictions on Impala Support for ADLS</h2> + + <div class="body conbody"> + + <p class="p"> + Impala requires that the default filesystem for the cluster be HDFS. You cannot use ADLS as the only + filesystem in the cluster. + </p> + + <p class="p"> + Although ADLS is often used to store JSON-formatted data, the current Impala support for ADLS does not include + directly querying JSON data. For Impala queries, use data files in one of the file formats listed in + <a class="xref" href="impala_file_formats.html#file_formats">How Impala Works with Hadoop File Formats</a>. If you have data in JSON format, you can prepare a + flattened version of that data for querying by Impala as part of your ETL cycle. + </p> + + <p class="p"> + You cannot use the <code class="ph codeph">ALTER TABLE ... SET CACHED</code> statement for tables or partitions that are + located in ADLS. + </p> + + </div> + + </article> + + <article class="topic concept nested1" aria-labelledby="ariaid-title13" id="adls__best_practices"> + <h2 class="title topictitle2" id="ariaid-title13">Best Practices for Using Impala with ADLS</h2> + + <div class="body conbody"> + <p class="p"> + The following guidelines represent best practices derived from testing and real-world experience with Impala on ADLS: + </p> + <ul class="ul"> + <li class="li"> + <p class="p"> + Any reference to an ADLS location must be fully qualified. (This rule applies when + ADLS is not designated as the default filesystem.) + </p> + </li> + <li class="li"> + <p class="p"> + Set any appropriate configuration settings for <span class="keyword cmdname">impalad</span>. + </p> + </li> + </ul> + + </div> + </article> + +</article></main></body></html> \ No newline at end of file http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/ae2f8d03/docs/build/html/topics/impala_alter_table.html ---------------------------------------------------------------------- diff --git a/docs/build/html/topics/impala_alter_table.html b/docs/build/html/topics/impala_alter_table.html index 5337a50..6d01d0e 100644 --- a/docs/build/html/topics/impala_alter_table.html +++ b/docs/build/html/topics/impala_alter_table.html @@ -57,7 +57,10 @@ statsKey ::= numDVs | numNulls | avgSize | maxSize</span> <var class="keyword varname">new_name</var> ::= [<var class="keyword varname">new_database</var>.]<var class="keyword varname">new_table_name</var> -<var class="keyword varname">col_spec</var> ::= <var class="keyword varname">col_name</var> <var class="keyword varname">type_name</var> +<var class="keyword varname">col_spec</var> ::= <var class="keyword varname">col_name</var> <var class="keyword varname">type_name</var> <span class="ph">[<var class="keyword varname">kudu_attributes</var>]</span> + +<span class="ph"><var class="keyword varname">kudu_attributes</var> ::= { [NOT] NULL | ENCODING <var class="keyword varname">codec</var> | COMPRESSION <var class="keyword varname">algorithm</var> | + DEFAULT <var class="keyword varname">constant</var> | BLOCK_SIZE <var class="keyword varname">number</var> }</span> <var class="keyword varname">partition_spec</var> ::= <var class="keyword varname">simple_partition_spec</var> | <span class="ph"><var class="keyword varname">complex_partition_spec</var></span> @@ -465,7 +468,7 @@ yes,no</code></pre> See <a class="xref" href="impala_perf_stats.html#perf_table_stats_manual">Setting the NUMROWS Value Manually through ALTER TABLE</a> for an example of using table properties to fine-tune the performance-related table statistics. </p> - + <p class="p"> <strong class="ph b">To manually set or update table or column statistics:</strong> </p> @@ -701,7 +704,7 @@ select * from p2; alter table p2 drop column x; select * from p2; -WARNINGS: +WARNINGS: File '<var class="keyword varname">hdfs_filename</var>' has an incompatible Parquet schema for column 'add_columns.p2.s3'. Column type: STRING, Parquet schema: optional int32 x [i:1 d:1 r:0] @@ -966,6 +969,12 @@ ALTER TABLE <var class="keyword varname">table_name</var> SET TBLPROPERTIES('EXT </li> <li class="li"> <p class="p"> + In <span class="keyword">Impala 2.9</span> and higher, you can also specify the <code class="ph codeph">ENCODING</code>, + <code class="ph codeph">COMPRESSION</code>, and <code class="ph codeph">BLOCK_SIZE</code> attributes when adding a column. + </p> + </li> + <li class="li"> + <p class="p"> If you add a column with a <code class="ph codeph">NOT NULL</code> attribute, it must also have a <code class="ph codeph">DEFAULT</code> attribute, so the default value can be assigned to that column for all existing rows. @@ -985,12 +994,6 @@ ALTER TABLE <var class="keyword varname">table_name</var> SET TBLPROPERTIES('EXT </li> <li class="li"> <p class="p"> - You cannot assign the <code class="ph codeph">ENCODING</code>, <code class="ph codeph">COMPRESSION</code>, - or <code class="ph codeph">BLOCK_SIZE</code> attributes when adding a column. - </p> - </li> - <li class="li"> - <p class="p"> You cannot change the default value, nullability, encoding, compression, or block size of existing columns in a Kudu table. </p> @@ -1013,6 +1016,20 @@ ALTER TABLE <var class="keyword varname">table_name</var> SET TBLPROPERTIES('EXT </div> <p class="p"> + The following are some examples of using the <code class="ph codeph">ADD COLUMNS</code> clause for a Kudu table: + </p> + +<pre class="pre codeblock"><code> +CREATE TABLE t1 ( x INT, PRIMARY KEY (x) ) + PARTITION BY HASH (x) PARTITIONS 16 + STORED AS KUDU + +ALTER TABLE t1 ADD COLUMNS (y STRING ENCODING prefix_encoding); +ALTER TABLE t1 ADD COLUMNS (z INT DEFAULT 10); +ALTER TABLE t1 ADD COLUMNS (a STRING NOT NULL DEFAULT '', t TIMESTAMP COMPRESSION default_compression); +</code></pre> + + <p class="p"> Kudu tables all use an underlying partitioning mechanism. The partition syntax is different than for non-Kudu tables. You can use the <code class="ph codeph">ALTER TABLE</code> statement to add and drop <dfn class="term">range partitions</dfn> from a Kudu table. Any new range must not overlap with any existing ranges. Dropping a range removes all the associated http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/ae2f8d03/docs/build/html/topics/impala_appx_median.html ---------------------------------------------------------------------- diff --git a/docs/build/html/topics/impala_appx_median.html b/docs/build/html/topics/impala_appx_median.html index 1883f2c..25085b4 100644 --- a/docs/build/html/topics/impala_appx_median.html +++ b/docs/build/html/topics/impala_appx_median.html @@ -58,6 +58,11 @@ </p> <p class="p"> + The <code class="ph codeph">APPX_MEDIAN</code> function returns only the first 10 characters for + string values (string, varchar, char). Additional characters are truncated. + </p> + + <p class="p"> <strong class="ph b">Examples:</strong> </p> http://git-wip-us.apache.org/repos/asf/incubator-impala/blob/ae2f8d03/docs/build/html/topics/impala_array.html ---------------------------------------------------------------------- diff --git a/docs/build/html/topics/impala_array.html b/docs/build/html/topics/impala_array.html index 45c9a42..960e337 100644 --- a/docs/build/html/topics/impala_array.html +++ b/docs/build/html/topics/impala_array.html @@ -118,7 +118,7 @@ type ::= <var class="keyword varname">primitive_type</var> | <var class="keyword </p> </li> <li class="li"> - <p class="p" id="array__d6e2889"> + <p class="p" id="array__d6e3003"> The maximum length of the column definition for any complex type, including declarations for any nested types, is 4000 characters. </p> @@ -135,7 +135,7 @@ type ::= <var class="keyword varname">primitive_type</var> | <var class="keyword <strong class="ph b">Kudu considerations:</strong> </p> <p class="p"> - Currently, the data types <code class="ph codeph">DECIMAL</code>, <code class="ph codeph">TIMESTAMP</code>, <code class="ph codeph">CHAR</code>, <code class="ph codeph">VARCHAR</code>, + Currently, the data types <code class="ph codeph">DECIMAL</code>, <code class="ph codeph">CHAR</code>, <code class="ph codeph">VARCHAR</code>, <code class="ph codeph">ARRAY</code>, <code class="ph codeph">MAP</code>, and <code class="ph codeph">STRUCT</code> cannot be used with Kudu tables. </p>
