This is an automated email from the ASF dual-hosted git repository.

michaelsmith pushed a commit to branch master
in repository https://gitbox.apache.org/repos/asf/impala.git


The following commit(s) were added to refs/heads/master by this push:
     new 93c64e7e9 IMPALA-13376: Add docs for AGG_MEM_CORRELATION_FACTOR etc
93c64e7e9 is described below

commit 93c64e7e9a771d72b860d1a6bdccc33d9f46f5ac
Author: Riza Suminto <[email protected]>
AuthorDate: Wed Sep 11 17:48:38 2024 -0700

    IMPALA-13376: Add docs for AGG_MEM_CORRELATION_FACTOR etc
    
    This patch adds documentation for AGG_MEM_CORRELATION_FACTOR and
    LARGE_AGG_MEM_THRESHOLD option introduced in Apache Impala 4.4.0.
    
    IMPALA-12548 fix behavior of AGG_MEM_CORRELATION_FACTOR. Higher value
    will lower memory estimation, while lower value will result in higher
    memory estimation. The documentation in ImpalaService.thrift, however,
    says the opposite. This patch fix documentation in thrift file as well.
    
    Testing:
    - Run "make plain-html" in docs/ dir and confirm the output.
    - Manually check with comments in
      PlannerTest.testAggNodeMaxMemEstimate()
    
    Change-Id: I00956a50fb7616ca3c3ea2fd75fd11239a6bcd90
    Reviewed-on: http://gerrit.cloudera.org:8080/21793
    Tested-by: Impala Public Jenkins <[email protected]>
    Reviewed-by: Michael Smith <[email protected]>
---
 common/thrift/ImpalaService.thrift                |  2 +-
 docs/impala.ditamap                               |  9 +--
 docs/shared/ImpalaVariables.xml                   |  1 +
 docs/shared/impala_common.xml                     |  8 +++
 docs/topics/impala_agg_mem_correlation_factor.xml | 78 +++++++++++++++++++++++
 docs/topics/impala_large_agg_mem_threshold.xml    | 68 ++++++++++++++++++++
 6 files changed, 161 insertions(+), 5 deletions(-)

diff --git a/common/thrift/ImpalaService.thrift 
b/common/thrift/ImpalaService.thrift
index a3e4c1c66..e01b9c995 100644
--- a/common/thrift/ImpalaService.thrift
+++ b/common/thrift/ImpalaService.thrift
@@ -847,7 +847,7 @@ enum TImpalaQueryOptions {
   // value means there is high correlation between grouping expressions / 
columns, while
   // low value means there is low correlation between them. High correlation 
means
   // aggregation node can be scheduled with lower memory estimation (lower 
memScale).
-  // Setting value 1.0 will result in an equal memory estimate as the default 
estimation
+  // Setting value 0.0 will result in an equal memory estimate as the default 
estimation
   // (no change). Defaults to 0.5.
   AGG_MEM_CORRELATION_FACTOR = 163
 
diff --git a/docs/impala.ditamap b/docs/impala.ditamap
index e022243ba..c050ff04c 100644
--- a/docs/impala.ditamap
+++ b/docs/impala.ditamap
@@ -178,6 +178,7 @@ under the License.
         <topicref href="topics/impala_abort_on_error.xml"/>
         <topicref href="topics/impala_allow_erasure_coded_files.xml"/>
         <topicref href="topics/impala_allow_unsupported_formats.xml"/>
+        <topicref rev="4.4.0" 
href="topics/impala_agg_mem_correlation_factor.xml"/>
         <topicref href="topics/impala_appx_count_distinct.xml"/>
         <topicref href="topics/impala_batch_size.xml"/>
         <topicref href="topics/impala_broadcast_bytes_limit.xml"/>
@@ -202,19 +203,19 @@ under the License.
         <topicref href="topics/impala_exec_single_node_rows_threshold.xml"/>
         <topicref href="topics/impala_exec_time_limit_s.xml"/>
         <topicref rev="4.2.0" href="topics/impala_expand_complex_types.xml"/>
-        <topicref href="topics/impala_explain_level.xml">
-          <topicref rev="2.5.0" 
href="topics/impala_max_num_runtime_filters.xml"/>
-        </topicref>
+        <topicref href="topics/impala_explain_level.xml"/>
         <topicref href="topics/impala_fetch_rows_timeout_ms.xml"/>
-        <topicref href="topics/impala_join_rows_produced_limit.xml"/>
         <topicref href="topics/impala_hbase_cache_blocks.xml"/>
         <topicref href="topics/impala_hbase_caching.xml"/>
         <topicref href="topics/impala_idle_session_timeout.xml"/>
+        <topicref href="topics/impala_join_rows_produced_limit.xml"/>
         <topicref href="topics/impala_kudu_read_mode.xml"/>
+        <topicref rev="4.4.0" 
href="topics/impala_large_agg_mem_threshold.xml"/>
         <topicref href="topics/impala_live_progress.xml"/>
         <topicref href="topics/impala_live_summary.xml"/>
         <topicref href="topics/impala_max_errors.xml"/>
         <topicref rev="3.1 IMPALA-6847" 
href="topics/impala_max_mem_estimate_for_admission.xml"/>
+        <topicref rev="2.5.0" 
href="topics/impala_max_num_runtime_filters.xml"/>
         <topicref href="topics/impala_max_result_spooling_mem.xml"/>
         <topicref rev="2.10.0 IMPALA-3200" 
href="topics/impala_max_row_size.xml"/>
         <topicref href="topics/impala_max_scan_range_length.xml"/>
diff --git a/docs/shared/ImpalaVariables.xml b/docs/shared/ImpalaVariables.xml
index d7236f06d..cf25bc00e 100644
--- a/docs/shared/ImpalaVariables.xml
+++ b/docs/shared/ImpalaVariables.xml
@@ -42,6 +42,7 @@ under the License.
        The docs included with a distro can refer to the distro release number 
by
        editing the values here.
        <ul>
+        <li><ph id="impala44">Impala 4.4</ph></li>
         <li><ph id="impala40">Impala 4.0</ph></li>
         <li><ph id="impala34">Impala 3.4</ph></li>
         <li><ph id="impala33">Impala 3.3</ph></li>
diff --git a/docs/shared/impala_common.xml b/docs/shared/impala_common.xml
index 091f6c85d..b4c1792f5 100644
--- a/docs/shared/impala_common.xml
+++ b/docs/shared/impala_common.xml
@@ -2479,6 +2479,10 @@ PROFILE;
         <b>Type:</b> integer
       </p>
 
+      <p id="type_double">
+        <b>Type:</b> double
+      </p>
+
       <p id="default_blurb">
         <b>Default:</b>
       </p>
@@ -3432,6 +3436,10 @@ flight_num:           INT32 SNAPPY DO:83456393 
FPO:83488603 SZ:10216514/11474301
         needed to represent each value.
       </p>
 
+      <p rev="4.4.0" id="added_in_440">
+        <b>Added in:</b> <keyword keyref="impala44"/>
+      </p>
+
       <p rev="4.0.0" id="added_in_400">
         <b>Added in:</b> <keyword keyref="impala40"/>
       </p>
diff --git a/docs/topics/impala_agg_mem_correlation_factor.xml 
b/docs/topics/impala_agg_mem_correlation_factor.xml
new file mode 100644
index 000000000..d9a27f7d1
--- /dev/null
+++ b/docs/topics/impala_agg_mem_correlation_factor.xml
@@ -0,0 +1,78 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.  The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+
+  http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing,
+software distributed under the License is distributed on an
+"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+KIND, either express or implied.  See the License for the
+specific language governing permissions and limitations
+under the License.
+-->
+<!DOCTYPE concept PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd">
+<concept id="agg_mem_correlation_factor" rev="4.4.0">
+
+  <title>AGG_MEM_CORRELATION_FACTOR Query Option (<keyword keyref="impala44"/> 
or higher only)</title>
+  <titlealts audience="PDF">
+    <navtitle>AGG MEM CORRELATION FACTOR</navtitle>
+  </titlealts>
+  <prolog>
+    <metadata>
+      <data name="Category" value="Impala"/>
+      <data name="Category" value="Impala Query Options"/>
+      <data name="Category" value="Performance"/>
+      <data name="Category" value="Developers"/>
+      <data name="Category" value="Data Analysts"/>
+    </metadata>
+  </prolog>
+
+  <conbody>
+
+    <p rev="4.4.0">
+      <indexterm audience="hidden">AGG_MEM_CORRELATION_FACTOR query 
option</indexterm>
+      Default correlation factor between two or more grouping columns in 
aggregation node.
+      When grouping over multiple columns, the query planner will assume this 
value to reason
+      about how correlated the columns are. A value close to 1.0 means columns 
are highly
+      correlated, while 0.0 means no correlation. In popular RDBMS, this 
column correlation
+      can usually be measured by using <codeph>CORR</codeph> function.
+    </p>
+    <p>
+      If both <codeph>AGG_MEM_CORRELATION_FACTOR</codeph> and
+      <codeph>LARGE_AGG_MEM_THRESHOLD</codeph> are set larger than 0, the 
planner will
+      switch memory estimation calculation for aggregation node from using NDV 
multiplication-based
+      algorithm to correlation-based memory estimation that should yield lower
+      estimate. Setting a high <codeph>AGG_MEM_CORRELATION_FACTOR</codeph> 
will result in
+      lower memory estimation, but no less than
+      <codeph>LARGE_AGG_MEM_THRESHOLD</codeph>. Setting a low value will 
result in higher
+      memory estimation, but will not exceed the default NDV 
multiplication-based
+      estimation.
+    </p>
+    <p>
+      Users can set this option value to 0.0 so the planner stays using the 
default
+      NDV multiplication based-estimation.
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/type_double"/>
+
+    <p conref="../shared/impala_common.xml#common/default_blurb"/>
+    <p>
+      <codeph>0.5</codeph>
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/added_in_440"/>
+
+    <p conref="../shared/impala_common.xml#common/related_info"/>
+    <p>
+      <xref href="impala_large_agg_mem_threshold.xml"/>
+    </p>
+
+  </conbody>
+</concept>
diff --git a/docs/topics/impala_large_agg_mem_threshold.xml 
b/docs/topics/impala_large_agg_mem_threshold.xml
new file mode 100644
index 000000000..33debbd89
--- /dev/null
+++ b/docs/topics/impala_large_agg_mem_threshold.xml
@@ -0,0 +1,68 @@
+<?xml version="1.0" encoding="UTF-8"?>
+<!--
+Licensed to the Apache Software Foundation (ASF) under one
+or more contributor license agreements.  See the NOTICE file
+distributed with this work for additional information
+regarding copyright ownership.  The ASF licenses this file
+to you under the Apache License, Version 2.0 (the
+"License"); you may not use this file except in compliance
+with the License.  You may obtain a copy of the License at
+
+  http://www.apache.org/licenses/LICENSE-2.0
+
+Unless required by applicable law or agreed to in writing,
+software distributed under the License is distributed on an
+"AS IS" BASIS, WITHOUT WARRANTIES OR CONDITIONS OF ANY
+KIND, either express or implied.  See the License for the
+specific language governing permissions and limitations
+under the License.
+-->
+<!DOCTYPE concept PUBLIC "-//OASIS//DTD DITA Concept//EN" "concept.dtd">
+<concept id="large_agg_mem_threshold" rev="4.4.0">
+
+  <title>LARGE_AGG_MEM_THRESHOLD Query Option (<keyword keyref="impala44"/> or 
higher only)</title>
+  <titlealts audience="PDF">
+    <navtitle>LARGE AGG MEM THRESHOLD</navtitle>
+  </titlealts>
+  <prolog>
+    <metadata>
+      <data name="Category" value="Impala"/>
+      <data name="Category" value="Impala Query Options"/>
+      <data name="Category" value="Performance"/>
+      <data name="Category" value="Developers"/>
+      <data name="Category" value="Data Analysts"/>
+    </metadata>
+  </prolog>
+
+  <conbody>
+
+    <p rev="4.4.0">
+      <indexterm audience="hidden">LARGE_AGG_MEM_THRESHOLD query 
option</indexterm>
+      The threshold for the query planner to determine whether memory 
estimation for an
+      aggregation node is large or not. Together with 
<codeph>AGG_MEM_CORRELATION_FACTOR</codeph>,
+      the planner will use this value to lower memory estimation for a large 
aggregation node.
+    </p>
+    <p>
+      Users can set this option value to 0.0 so the planner stays using the 
default
+      NDV multiplication based-estimation. See 
<codeph>AGG_MEM_CORRELATION_FACTOR</codeph>
+      documentation for more detail.
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/type_integer"/>
+
+    <p conref="../shared/impala_common.xml#common/default_blurb"/>
+    <p>
+      <codeph>536870912</codeph> (512 MB)
+    </p>
+
+    <p conref="../shared/impala_common.xml#common/units_blurb"/>
+
+    <p conref="../shared/impala_common.xml#common/added_in_440"/>
+
+    <p conref="../shared/impala_common.xml#common/related_info"/>
+    <p>
+      <xref href="impala_agg_mem_correlation_factor.xml"/>
+    </p>
+
+  </conbody>
+</concept>

Reply via email to