This is an automated email from the ASF dual-hosted git repository.

todd pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git

commit e21764b912b3118ee62defc07a568aa7e84790e5
Author: Alex Rodoni <arod...@cloudera.com>
AuthorDate: Tue May 7 16:27:03 2019 -0700

    IMPALA-8116: [DOCS] A new doc for Impala Scaling Limits
    
    - Listed the known/tested SCALING Limits.
    - Unknown limits are marked hidden for now. When the numbers
    are available, will remove the hidden tag.
    
    Change-Id: Ie6df672e5de1fb2d34f6b78524e8f20e85ea34fb
    Reviewed-on: http://gerrit.cloudera.org:8080/13277
    Tested-by: Impala Public Jenkins <impala-public-jenk...@cloudera.com>
    Reviewed-by: Tim Armstrong <tarmstr...@cloudera.com>
---
 docs/impala.ditamap                   |   1 +
 docs/topics/impala_scaling_limits.xml | 364 ++++++++++++++++++++++++++++++++++
 2 files changed, 365 insertions(+)

diff --git a/docs/impala.ditamap b/docs/impala.ditamap
index 2468e4c..ed69762 100644
--- a/docs/impala.ditamap
+++ b/docs/impala.ditamap
@@ -297,6 +297,7 @@ under the License.
     <topicref audience="hidden" href="topics/impala_perf_ddl.xml"/>
   </topicref>
   <topicref href="topics/impala_scalability.xml">
+    <topicref href="topics/impala_scaling_limits.xml"/>
     <topicref href="topics/impala_dedicated_coordinator.xml"/>
     <topicref href="topics/impala_metadata.xml"/>
   </topicref>
diff --git a/docs/topics/impala_scaling_limits.xml 
b/docs/topics/impala_scaling_limits.xml
new file mode 100644
index 0000000..ba82406
--- /dev/null
+++ b/docs/topics/impala_scaling_limits.xml
@@ -0,0 +1,364 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.  The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+
+  http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing,
+software distributed under the License is distributed on an
+"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+KIND, either express or implied.  See the License for the
+specific language governing permissions and limitations
+under the License.
+-->
+<!DOCTYPE concept PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd">
+<concept id="impala_scaling_limits">
+
+  <title>Scaling Limits and Guidelines</title>
+
+  <prolog>
+    <metadata>
+      <data name="Category" value="Impala"/>
+      <data name="Category" value="Scalability"/>
+    </metadata>
+  </prolog>
+
+  <conbody>
+
+    <p>
+      This topic lists the <i>scalability</i> limitation in Impala. For a 
given functional
+      feature, it is recommended that you respect these limitations to achieve 
optimal
+      scalability and performance. For example, you might be able to create a 
table with 2000
+      columns, you will experience performance problems while querying the 
table. This topic
+      does not cover functional limitations in Impala.
+    </p>
+
+    <p>
+      Unless noted otherwise, the limits were tested and certified.
+    </p>
+
+    <p>
+      The limits noted as "<i>generally safe</i>" are not certified, but 
recommended as
+      generally safe. A safe range is not a hard limit as unforeseen errors or 
troubles in your
+      particular environment can affect the range.
+    </p>
+
+    <p outputclass="toc inpage"/>
+
+  </conbody>
+
+  <concept id="deployment_limits">
+
+    <title>Deployment Limits</title>
+
+    <conbody>
+
+      <ul>
+        <li>
+          Number of Impalad Executors
+          <ul>
+            <li>
+              80 nodes in CDH 5.14 and lower
+            </li>
+
+            <li>
+              150 nodes in CDH 5.15 and higher
+            </li>
+          </ul>
+        </li>
+
+        <li>
+          Number of Impalad Coordinators: 1 coordinator for at most every 50 
executors
+          <p>
+            See
+            <xref
+              
href="https://www.cloudera.com/documentation/enterprise/6/6.2/topics/impala_dedicated_coordinator.html#concept_vhv_4b1_n2b";
+              format="html" scope="external">Dedicated
+            Coordinators</xref> for details.
+          </p>
+        </li>
+
+        <li audience="hidden">
+          Max memory
+        </li>
+
+        <li audience="hidden">
+          Max number of CPU cores
+        </li>
+
+        <li audience="hidden">
+          Max number of disks
+        </li>
+      </ul>
+
+      <ul>
+        <li>
+          The number of Impala clusters per deployment
+          <ul>
+            <li>
+              1 Impala cluster in Impala 3.1 and lower
+            </li>
+
+            <li>
+              Multiple clusters in Impala 3.2 and higher is <i>generally 
safe</i>.
+            </li>
+          </ul>
+        </li>
+      </ul>
+
+    </conbody>
+
+  </concept>
+
+  <concept id="data_storage_limits">
+
+    <title>Data Storage Limits</title>
+
+    <conbody>
+
+      <p>
+        There are no hard limits for the following, but you will experience 
gradual performance
+        degradation as you increase these numbers.
+      </p>
+
+      <ul>
+        <li>
+          Number of databases
+        </li>
+
+        <li>
+          Number of tables - total, per database
+        </li>
+
+        <li>
+          Number of partitions - total, per table
+        </li>
+
+        <li>
+          Number of files - total, per table, per table per partition
+        </li>
+
+        <li>
+          Number of views - total, per database
+        </li>
+
+        <li>
+          Number of user-defined functions - total, per database
+        </li>
+
+        <li>
+          Parquet
+          <ul>
+            <li>
+              Number of columns per row group
+            </li>
+
+            <li>
+              Number of row groups per block
+            </li>
+
+            <li>
+              Number of HDFS blocks per file
+            </li>
+          </ul>
+        </li>
+      </ul>
+
+    </conbody>
+
+  </concept>
+
+  <concept id="schema_design_limits">
+
+    <title>Schema Design Limits</title>
+
+    <conbody>
+
+      <ul>
+        <li>
+          Number of columns
+          <ul>
+            <li>
+              300 for Kudu tables
+              <p>
+                See
+                <xref
+                  
href="https://www.cloudera.com/documentation/enterprise/latest/topics/kudu_limitations.html";
+                  format="html" scope="external">Kudu
+                Usage Limitations</xref> for more information.
+              </p>
+            </li>
+
+            <li>
+              1000 for other types of tables
+            </li>
+          </ul>
+        </li>
+
+        <li audience="hidden">
+          Table and column name length
+        </li>
+
+        <li audience="hidden">
+          Maximum cell size
+        </li>
+      </ul>
+
+    </conbody>
+
+  </concept>
+
+  <concept id="security_limits">
+
+    <title>Security Limits</title>
+
+    <conbody>
+
+      <ul>
+        <li>
+          Number of roles: 10,000 for Sentry
+        </li>
+
+        <li audience="hidden">
+          Number of columns used in column level ACL
+        </li>
+      </ul>
+
+    </conbody>
+
+  </concept>
+
+  <concept id="ddl_limits" audience="hidden">
+
+    <title>Ingestion and DDL Limits</title>
+
+    <conbody>
+
+      <ul>
+        <li>
+          Number of DDL operations per minutes
+        </li>
+
+        <li>
+          Number of concurrent DDL operations
+        </li>
+      </ul>
+
+    </conbody>
+
+  </concept>
+
+  <concept id="query_compile_limits">
+
+    <title>Query Limits - Compile Time</title>
+
+    <conbody>
+
+      <ul>
+        <li>
+          Maximum number of columns in a query, included in a 
<codeph>SELECT</codeph> list,
+          <codeph>INSERT</codeph>, and in an expression: no limit
+        </li>
+
+        <li>
+          Number of tables referenced: no limit
+        </li>
+
+        <li>
+          Number of plan nodes: no limit
+        </li>
+
+        <li>
+          Number of plan fragments: no limit
+        </li>
+
+        <li>
+          Depth of expression tree: 1000 hard limit
+        </li>
+
+        <li>
+          Width of expression tree: 10,000 hard limit
+        </li>
+      </ul>
+
+    </conbody>
+
+  </concept>
+
+  <concept id="query_runtime_limits">
+
+    <title>Query Limits - Runtime Time</title>
+
+    <conbody>
+
+      <ul>
+        <li audience="hidden">
+          Number of Fragment and fragment instances
+        </li>
+
+        <li>
+          Codegen
+          <ul>
+            <li>
+              Very deeply nested expressions within queries can exceed 
internal Impala limits,
+              leading to excessive memory usage. Setting the query option
+              <codeph>disable_codegen=true</codeph> may reduce the impact, at 
a cost of longer
+              query runtime.
+            </li>
+          </ul>
+        </li>
+
+        <li audience="hidden">
+          Runtime Filter
+          <ul>
+            <li>
+              Max #filter
+            </li>
+
+            <li>
+              Max filter size
+            </li>
+          </ul>
+        </li>
+
+        <li audience="hidden">
+          Query Operators
+          <ul>
+            <li>
+              Scan
+            </li>
+
+            <li>
+              Join
+            </li>
+
+            <li>
+              Exchange
+            </li>
+
+            <li>
+              Agg
+            </li>
+
+            <li>
+              Sort
+            </li>
+
+            <li>
+              Merge
+            </li>
+          </ul>
+        </li>
+      </ul>
+
+    </conbody>
+
+  </concept>
+
+</concept>

Reply via email to