This is an automated email from the ASF dual-hosted git repository.

stigahuang pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git

commit 22ac00fd4d0fd023a743be3b047258bdce03d3f4
Author: Sahil Takiar <[email protected]>
AuthorDate: Tue Oct 13 14:46:29 2020 -0700

    IMPALA-9910: [DOCS] Add fault tolerance docs
    
    Adds a few basic docs for fault tolerance in Impala. Covers the
    following topics:
    * Transparent query retries
    * Node blacklisting
    * Statestore heartbeats
    
    This commit only adds a high level explanation of the afortmentioned
    fault tolerance concepts. The docs should be expanded on in a future
    commit.
    
    Change-Id: I9d178b21a9654bbed8b814ccadca95703ffacb62
    Reviewed-on: http://gerrit.cloudera.org:8080/16610
    Reviewed-by: Impala Public Jenkins <[email protected]>
    Tested-by: Impala Public Jenkins <[email protected]>
---
 docs/impala.ditamap                              |  5 ++
 docs/impala_keydefs.ditamap                      |  1 +
 docs/topics/impala_fault_tolerance.xml           | 60 ++++++++++++++++++++++++
 docs/topics/impala_node_blacklisting.xml         | 34 ++++++++++++++
 docs/topics/impala_retry_failed_queries.xml      | 52 ++++++++++++++++++++
 docs/topics/impala_spool_query_results.xml       |  2 +-
 docs/topics/impala_transparent_query_retries.xml | 56 ++++++++++++++++++++++
 7 files changed, 209 insertions(+), 1 deletion(-)

diff --git a/docs/impala.ditamap b/docs/impala.ditamap
index 9407167..d6aeedd 100644
--- a/docs/impala.ditamap
+++ b/docs/impala.ditamap
@@ -233,6 +233,7 @@ under the License.
           <topicref rev="2.7.0" href="topics/impala_replica_preference.xml"/>
           <topicref href="topics/impala_request_pool.xml"/>
           <topicref href="topics/impala_resource_trace_ratio.xml"/>
+          <topicref rev="4.0.0" href="topics/impala_retry_failed_queries.xml"/>
           <topicref rev="2.5.0" 
href="topics/impala_runtime_bloom_filter_size.xml"/>
           <topicref rev="2.6.0" 
href="topics/impala_runtime_filter_max_size.xml"/>
           <topicref rev="2.6.0" 
href="topics/impala_runtime_filter_min_size.xml"/>
@@ -339,6 +340,10 @@ under the License.
     <topicref href="topics/impala_jdbc.xml"/>
     <topicref href="topics/impala_query_results_spooling.xml"/>
   </topicref>
+  <topicref href="topics/impala_fault_tolerance.xml">
+    <topicref href="topics/impala_transparent_query_retries.xml"/>
+    <topicref href="topics/impala_node_blacklisting.xml"/>
+  </topicref>
   <topicref href="topics/impala_troubleshooting.xml">
     <topicref href="topics/impala_webui.xml"/>
     <topicref href="topics/impala_breakpad.xml"/>
diff --git a/docs/impala_keydefs.ditamap b/docs/impala_keydefs.ditamap
index 594fa4d..0cb8a0b 100644
--- a/docs/impala_keydefs.ditamap
+++ b/docs/impala_keydefs.ditamap
@@ -10521,6 +10521,7 @@ under the License.
   <keydef href="https://issues.apache.org/jira/browse/IMPALA-9999"; 
scope="external" format="html" keys="IMPALA-9999"/>
 
 <!-- Short form of mapping from Impala release to vendor-specific releases, 
for use in headings. -->
+  <keydef keys="impala40"><topicmeta><keywords><keyword>Impala 
4.0</keyword></keywords></topicmeta></keydef>
   <keydef keys="impala34"><topicmeta><keywords><keyword>Impala 
3.4</keyword></keywords></topicmeta></keydef>
   <keydef keys="impala33"><topicmeta><keywords><keyword>Impala 
3.3</keyword></keywords></topicmeta></keydef>
   <keydef keys="impala32"><topicmeta><keywords><keyword>Impala 
3.2</keyword></keywords></topicmeta></keydef>
diff --git a/docs/topics/impala_fault_tolerance.xml 
b/docs/topics/impala_fault_tolerance.xml
new file mode 100644
index 0000000..63b77d8
--- /dev/null
+++ b/docs/topics/impala_fault_tolerance.xml
@@ -0,0 +1,60 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.  The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+
+  http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing,
+software distributed under the License is distributed on an
+"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+KIND, either express or implied.  See the License for the
+specific language governing permissions and limitations
+under the License.
+-->
+<!DOCTYPE concept PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd">
+<concept id="fault_tolerance">
+
+  <title>Impala Fault Tolerance</title>
+
+  <titlealts audience="PDF">
+
+    <navtitle>Fault Tolerance</navtitle>
+
+  </titlealts>
+
+  <prolog>
+    <metadata>
+      <data name="Category" value="Fault Tolerance"/>
+      <data name="Category" value="Impala"/>
+      <data name="Category" value="Developers"/>
+      <data name="Category" value="Administrators"/>
+    </metadata>
+  </prolog>
+
+  <conbody>
+
+    <p>
+      This section describes the fault tolerance mechanisms built into
+      Impala. Fault tolerance allows Impala to continue to operate in the
+      face of hardware or process faults.
+    </p>
+
+    <p>
+      Impala uses the statestored process to provide cluster membership for
+      an Impala cluster. This is one of the core ways Impala provides fault
+      tolerance. The statestore periodically sends heartbeats (RPCs) to each
+      impalad process. If an impalad stops responding to heartbeats, the
+      statestored will consider the impalad as failed, and it will remove it
+      from the cluster membership state. The updates to the cluster
+      membership will be broadcasted to all impalads.
+    </p>
+
+  </conbody>
+
+</concept>
diff --git a/docs/topics/impala_node_blacklisting.xml 
b/docs/topics/impala_node_blacklisting.xml
new file mode 100644
index 0000000..1b0034d
--- /dev/null
+++ b/docs/topics/impala_node_blacklisting.xml
@@ -0,0 +1,34 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.  The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+
+  http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing,
+software distributed under the License is distributed on an
+"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+KIND, either express or implied.  See the License for the
+specific language governing permissions and limitations
+under the License.
+-->
+<!DOCTYPE concept PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd">
+<concept id="node_blacklisting">
+  <title>Impala Node Blacklisting</title>
+  <conbody>
+    <p>Node Blacklisting allows Impala Coordinators to be more aggressive
+      about deciding that an executor is unhealthy or unavailable, to
+      minimize failed queries in environments where cluster membership maybe
+      more variable, rather than having to wait on the statestore heartbeat
+      mechanism to decide that the executor is down.</p>
+    <p>Node blacklists are local to a coordinator. A node is put on the
+      blacklist based on information from failed queries. Nodes are only
+      blacklisted temporarily, and are taken off the blacklist after a
+      certain period of time.</p>
+  </conbody>
+</concept>
diff --git a/docs/topics/impala_retry_failed_queries.xml 
b/docs/topics/impala_retry_failed_queries.xml
new file mode 100644
index 0000000..e20a327
--- /dev/null
+++ b/docs/topics/impala_retry_failed_queries.xml
@@ -0,0 +1,52 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.  The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+
+  http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing,
+software distributed under the License is distributed on an
+"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+KIND, either express or implied.  See the License for the
+specific language governing permissions and limitations
+under the License.
+-->
+<!DOCTYPE concept PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd">
+<concept id="RETRY_FAILED_QUERIES" rev="2.10.0 IMPALA-3200">
+  <title>RETRY_FAILED_QUERIES Query Option</title>
+  <titlealts audience="PDF">
+    <navtitle>RETRY_FAILED_QUERIES</navtitle>
+  </titlealts>
+  <prolog>
+    <metadata>
+      <data name="Category" value="Impala"/>
+      <data name="Category" value="Impala Query Options"/>
+      <data name="Category" value="Querying"/>
+      <data name="Category" value="Developers"/>
+      <data name="Category" value="Administrators"/>
+    </metadata>
+  </prolog>
+  <conbody>
+    <p>Use the <codeph>RETRY_FAILED_QUERIES</codeph> query option to control
+      whether or not queries are transparently retried on cluster membership
+      changes. </p>
+    <p>Transparent query retries will automatically trigger retries of queries
+      that fail because of a change in the Impala cluster membership. Cluster
+      membership changes typically occur when an impalad crashes, or if the
+      node is blacklisted by the Impala Coordinator.</p>
+    <p><b>Type:</b>
+      <codeph>BOOLEAN</codeph></p>
+    <p><b>Default:</b>
+      <codeph>FALSE</codeph></p>
+    <p><b>Added in:</b>
+      <keyword keyref="impala40"/></p>
+    <p><b>Related information:</b>
+      <xref href="impala_transparent_query_retries.xml"/></p>
+  </conbody>
+</concept>
diff --git a/docs/topics/impala_spool_query_results.xml 
b/docs/topics/impala_spool_query_results.xml
index beea3fb..9978687 100644
--- a/docs/topics/impala_spool_query_results.xml
+++ b/docs/topics/impala_spool_query_results.xml
@@ -52,7 +52,7 @@ under the License.
             results.</p></li>
       </ul></p>
     <p><b>Type:</b>
-      <codeph>INT</codeph></p>
+      <codeph>BOOLEAN</codeph></p>
     <p><b>Default:</b>
       <codeph>FALSE</codeph></p>
     <p><b>Added in:</b>
diff --git a/docs/topics/impala_transparent_query_retries.xml 
b/docs/topics/impala_transparent_query_retries.xml
new file mode 100644
index 0000000..05962a5
--- /dev/null
+++ b/docs/topics/impala_transparent_query_retries.xml
@@ -0,0 +1,56 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.  The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+
+  http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing,
+software distributed under the License is distributed on an
+"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+KIND, either express or implied.  See the License for the
+specific language governing permissions and limitations
+under the License.
+-->
+<!DOCTYPE concept PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd">
+<concept id="transparent_query_retries">
+  <title>Impala Transparent Query Retries</title>
+  <conbody>
+    <p>Transparent query retries will automatically retry any queries that fail
+      due to cluster membership changes. A cluster membership change typically
+      entails a node leaving the cluster before it crashed or for some other
+      reason stopped responding to statestore heartbeats.</p>
+    <p>Traditionally, if a query runs on a node in the Impala cluster, and that
+      node crashes, then the query will fail and it is up to the user to retry
+      the query. With transparent query retries, the query will be 
automatically
+      retried.</p>
+    <ul>
+      <li>Queries are only retried if the query failed due to a cluster 
membership
+        change. Trivial failures, like SQL parsing exceptions are <b>not</b>
+        retried.</li>
+      <li>Cluster membership changes fall into two categories: membership 
updates
+        from the statestore or node blacklisting events. <p>impalads 
periodically
+        send heartbeats to the statestore, if an impalad stops sending 
heartbeats
+        to the statestore then that impalad is removed from the cluster
+        membership</p><p>Node blacklisting events occur when a query fails and 
as
+        a result, an impalad in the cluster is added to the Coordinator's node
+        blacklist. In this scenario, the query is retried.</p></li>
+      <li>For most users, query retries will be completely transparent, but
+        users who want to know why a retry was necessary can use runtime 
profiles.
+        Each query attempt is modelled as a completely new query. Thus, each 
query
+        attempt has its own runtime profiles. Users can look through the 
profiles
+        of the failed query attempts to determine why the query was 
retried.</li>
+    </ul>
+    <p>Transparent query retries are turned off by default, but can be enabled 
via
+      the <codeph>RETRY_FAILED_QUERIES</codeph> query option.</p>
+    <section>
+      <p><b>Related information:</b>
+        <xref href="impala_retry_failed_queries.xml#RETRY_FAILED_QUERIES"/></p>
+    </section>
+  </conbody>
+</concept>

Reply via email to