HAWQ-936. Add GUC for array expansion in ORCA optimizer Consider the query with the following pattern 'select * from foo where foo.a IN (1,2,3,...)'. Currently, when the number of constants in the IN subquery is large, the query optimization time is unacceptable. This is stopping customers from turning Orca on by default since many of the queries are generated queries with such a pattern. The root cause is due to the expansion of the IN subquery into an expression in disjunctive normal form. The objective of this patch is to add a guc parameter 'optimizer_array_expansion_threshold' to control the maximum number of array elements in IN array list. By default, the GUC value is set to 25, which means if the array size is larger than 25, the array expansion is disabled.
Project: http://git-wip-us.apache.org/repos/asf/incubator-hawq/repo Commit: http://git-wip-us.apache.org/repos/asf/incubator-hawq/commit/950065bb Tree: http://git-wip-us.apache.org/repos/asf/incubator-hawq/tree/950065bb Diff: http://git-wip-us.apache.org/repos/asf/incubator-hawq/diff/950065bb Branch: refs/heads/master Commit: 950065bb0db627992be6b96afb4671b3c5ab1c90 Parents: eae0585 Author: Haisheng Yuan and Lubomir Petrov <[email protected]> Authored: Mon Jul 18 17:57:51 2016 -0700 Committer: Ming LI <[email protected]> Committed: Mon Aug 1 10:42:07 2016 +0800 ---------------------------------------------------------------------- depends/thirdparty/gporca.commit | 2 +- src/backend/gpopt/ivy.xml | 2 +- src/backend/gpopt/utils/COptTasks.cpp | 5 ++++- src/backend/utils/misc/guc.c | 14 ++++++++++++-- src/include/utils/guc.h | 5 +++-- 5 files changed, 21 insertions(+), 7 deletions(-) ---------------------------------------------------------------------- http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/950065bb/depends/thirdparty/gporca.commit ---------------------------------------------------------------------- diff --git a/depends/thirdparty/gporca.commit b/depends/thirdparty/gporca.commit index d4192e1..cf4eaaa 100644 --- a/depends/thirdparty/gporca.commit +++ b/depends/thirdparty/gporca.commit @@ -1 +1 @@ -https://github.com/greenplum-db/gporca.git master 03be7066f58f2b3bd0ab5f866458ea526be96494 +https://github.com/greenplum-db/gporca.git master c5e40f283703b5fa4c2eb40f367ab7c1b1ab4d0d http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/950065bb/src/backend/gpopt/ivy.xml ---------------------------------------------------------------------- diff --git a/src/backend/gpopt/ivy.xml b/src/backend/gpopt/ivy.xml index 4d86feb..045c717 100644 --- a/src/backend/gpopt/ivy.xml +++ b/src/backend/gpopt/ivy.xml @@ -38,7 +38,7 @@ under the License. </configurations> <dependencies> - <dependency org="emc" name="optimizer" rev="1.637" conf="osx106_x86->osx106_x86_32;osx106_x86_32->osx106_x86_32;rhel5_x86_64->rhel5_x86_64;suse10_x86_64->suse10_x86_64" /> + <dependency org="emc" name="optimizer" rev="1.638" conf="osx106_x86->osx106_x86_32;osx106_x86_32->osx106_x86_32;rhel5_x86_64->rhel5_x86_64;suse10_x86_64->suse10_x86_64" /> <dependency org="emc" name="libgpos" rev="1.137" conf="osx106_x86->osx106_x86_32;osx106_x86_32->osx106_x86_32;rhel5_x86_64->rhel5_x86_64;suse10_x86_64->suse10_x86_64" /> <dependency org="xerces" name="xerces-c" rev="3.1.1-p1" conf="osx106_x86->osx106_x86_32;osx106_x86_32->osx106_x86_32;rhel5_x86_64->rhel5_x86_64;suse10_x86_64->suse10_x86_64" /> </dependencies> http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/950065bb/src/backend/gpopt/utils/COptTasks.cpp ---------------------------------------------------------------------- diff --git a/src/backend/gpopt/utils/COptTasks.cpp b/src/backend/gpopt/utils/COptTasks.cpp index 8511f3e..03de99e 100644 --- a/src/backend/gpopt/utils/COptTasks.cpp +++ b/src/backend/gpopt/utils/COptTasks.cpp @@ -764,6 +764,7 @@ COptTasks::PoconfCreate ULONG ulCTEInliningCutoff = (ULONG) optimizer_cte_inlining_bound; ULONG ulPartsToForceSortOnInsert = (ULONG) optimizer_parts_to_force_sort_on_insert; ULONG ulJoinArityForAssociativityCommutativity = (ULONG) optimizer_join_arity_for_associativity_commutativity; + ULONG ulArrayExpansionThreshold = (ULONG) optimizer_array_expansion_threshold; return GPOS_NEW(pmp) COptimizerConfig ( @@ -771,7 +772,9 @@ COptTasks::PoconfCreate GPOS_NEW(pmp) CStatisticsConfig(pmp, dDampingFactorFilter, dDampingFactorJoin, dDampingFactorGroupBy), GPOS_NEW(pmp) CCTEConfig(ulCTEInliningCutoff), pcm, - GPOS_NEW(pmp) CHint(ulPartsToForceSortOnInsert /* optimizer_parts_to_force_sort_on_insert */, ulJoinArityForAssociativityCommutativity) + GPOS_NEW(pmp) CHint(ulPartsToForceSortOnInsert /* optimizer_parts_to_force_sort_on_insert */, + ulJoinArityForAssociativityCommutativity, + ulArrayExpansionThreshold) ); } http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/950065bb/src/backend/utils/misc/guc.c ---------------------------------------------------------------------- diff --git a/src/backend/utils/misc/guc.c b/src/backend/utils/misc/guc.c index 48e09f2..4bd2aa5 100644 --- a/src/backend/utils/misc/guc.c +++ b/src/backend/utils/misc/guc.c @@ -741,6 +741,9 @@ double optimizer_damping_factor_filter; double optimizer_damping_factor_join; double optimizer_damping_factor_groupby; int optimizer_segments; +int optimizer_parts_to_force_sort_on_insert; +int optimizer_join_arity_for_associativity_commutativity; +int optimizer_array_expansion_threshold; bool optimizer_analyze_root_partition; bool optimizer_analyze_midlevel_partition; bool optimizer_enable_constant_expression_evaluation; @@ -760,8 +763,6 @@ bool optimizer_multilevel_partitioning; bool optimizer_enable_derive_stats_all_groups; bool optimizer_explain_show_status; bool optimizer_prefer_scalar_dqa_multistage_agg; -int optimizer_parts_to_force_sort_on_insert; -int optimizer_join_arity_for_associativity_commutativity; /* Security */ bool gp_reject_internal_tcp_conn = true; @@ -6124,6 +6125,15 @@ static struct config_int ConfigureNamesInt[] = 43200000, 0, INT_MAX, NULL, NULL }, { + {"optimizer_array_expansion_threshold", PGC_USERSET, QUERY_TUNING_METHOD, + gettext_noop("Item limit for expansion of arrays in WHERE clause to disjunctive form."), + NULL, + GUC_NO_SHOW_ALL | GUC_NOT_IN_SAMPLE + }, + &optimizer_array_expansion_threshold, + 25, 0, INT_MAX, NULL, NULL + }, + { {"memory_profiler_dataset_size", PGC_USERSET, DEVELOPER_OPTIONS, gettext_noop("Set the size in GB"), NULL, http://git-wip-us.apache.org/repos/asf/incubator-hawq/blob/950065bb/src/include/utils/guc.h ---------------------------------------------------------------------- diff --git a/src/include/utils/guc.h b/src/include/utils/guc.h index 337d265..67183af 100644 --- a/src/include/utils/guc.h +++ b/src/include/utils/guc.h @@ -416,6 +416,9 @@ extern double optimizer_damping_factor_filter; extern double optimizer_damping_factor_join; extern double optimizer_damping_factor_groupby; extern int optimizer_segments; +extern int optimizer_parts_to_force_sort_on_insert; +extern int optimizer_join_arity_for_associativity_commutativity; +extern int optimizer_array_expansion_threshold; extern bool optimizer_analyze_root_partition; extern bool optimizer_analyze_midlevel_partition; extern bool optimizer_enable_constant_expression_evaluation; @@ -436,8 +439,6 @@ extern bool optimizer_multilevel_partitioning; extern bool optimizer_enable_derive_stats_all_groups; extern bool optimizer_explain_show_status; extern bool optimizer_prefer_scalar_dqa_multistage_agg; -extern int optimizer_parts_to_force_sort_on_insert; -extern int optimizer_join_arity_for_associativity_commutativity; /** * Enable logging of DPE match in optimizer.
